repair/phase6.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18
  19 #include "libxfs.h"
  20 #include "threads.h"
  21 #include "prefetch.h"
  22 #include "avl.h"
  23 #include "globals.h"
  24 #include "agheader.h"
  25 #include "incore.h"
  26 #include "dir2.h"
  27 #include "protos.h"
  28 #include "err_protos.h"
  29 #include "dinode.h"
  30 #include "progress.h"
  31 #include "versions.h"
  32
  33 static struct cred              zerocr;
  34 static struct fsxattr           zerofsx;
  35 static xfs_ino_t                orphanage_ino;
  36
  37 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  38                                                 1,
  39                                                 XFS_DIR3_FT_DIR};
  40
  41 /*
  42  * When we're checking directory inodes, we're allowed to set a directory's
  43  * dotdot entry to zero to signal that the parent needs to be reconnected
  44  * during phase 6.  If we're handling a shortform directory the ifork
  45  * verifiers will fail, so temporarily patch out this canary so that we can
  46  * verify the rest of the fork and move on to fixing the dir.
  47  */
  48 static xfs_failaddr_t
  49 phase6_verify_dir(
  50         struct xfs_inode                *ip)
  51 {
  52         struct xfs_mount                *mp = ip->i_mount;
  53         const struct xfs_dir_ops        *dops;
  54         struct xfs_ifork                *ifp;
  55         struct xfs_dir2_sf_hdr          *sfp;
  56         xfs_failaddr_t                  fa;
  57         xfs_ino_t                       old_parent;
  58         bool                            parent_bypass = false;
  59         int                             size;
  60
  61         dops = libxfs_dir_get_ops(mp, NULL);
  62
  63         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
  64         sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
  65         size = ifp->if_bytes;
  66
  67         /*
  68          * If this is a shortform directory, phase4 may have set the parent
  69          * inode to zero to indicate that it must be fixed.  Temporarily
  70          * set a valid parent so that the directory verifier will pass.
  71          */
  72         if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
  73             size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
  74                 old_parent = dops->sf_get_parent_ino(sfp);
  75                 if (old_parent == 0) {
  76                         dops->sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
  77                         parent_bypass = true;
  78                 }
  79         }
  80
  81         fa = libxfs_default_ifork_ops.verify_dir(ip);
  82
  83         /* Put it back. */
  84         if (parent_bypass)
  85                 dops->sf_put_parent_ino(sfp, old_parent);
  86
  87         return fa;
  88 }
  89
  90 static struct xfs_ifork_ops phase6_ifork_ops = {
  91         .verify_attr    = xfs_attr_shortform_verify,
  92         .verify_dir     = phase6_verify_dir,
  93         .verify_symlink = xfs_symlink_shortform_verify,
  94 };
  95
  96 /*
  97  * Data structures used to keep track of directories where the ".."
  98  * entries are updated. These must be rebuilt after the initial pass
  99  */
 100 typedef struct dotdot_update {
 101         struct list_head        list;
 102         ino_tree_node_t         *irec;
 103         xfs_agnumber_t          agno;
 104         int                     ino_offset;
 105 } dotdot_update_t;
 106
 107 static LIST_HEAD(dotdot_update_list);
 108 static int                      dotdot_update;
 109
 110 static void
 111 add_dotdot_update(
 112         xfs_agnumber_t          agno,
 113         ino_tree_node_t         *irec,
 114         int                     ino_offset)
 115 {
 116         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
 117
 118         if (!dir)
 119                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
 120                         sizeof(dotdot_update_t));
 121
 122         INIT_LIST_HEAD(&dir->list);
 123         dir->irec = irec;
 124         dir->agno = agno;
 125         dir->ino_offset = ino_offset;
 126
 127         list_add(&dir->list, &dotdot_update_list);
 128 }
 129
 130 /*
 131  * Data structures and routines to keep track of directory entries
 132  * and whether their leaf entry has been seen. Also used for name
 133  * duplicate checking and rebuilding step if required.
 134  */
 135 typedef struct dir_hash_ent {
 136         struct dir_hash_ent     *nextbyaddr;    /* next in addr bucket */
 137         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
 138         struct dir_hash_ent     *nextbyorder;   /* next in order added */
 139         xfs_dahash_t            hashval;        /* hash value of name */
 140         uint32_t                address;        /* offset of data entry */
 141         xfs_ino_t               inum;           /* inode num of entry */
 142         short                   junkit;         /* name starts with / */
 143         short                   seen;           /* have seen leaf entry */
 144         struct xfs_name         name;
 145 } dir_hash_ent_t;
 146
 147 typedef struct dir_hash_tab {
 148         int                     size;           /* size of hash tables */
 149         int                     names_duped;    /* 1 = ent names malloced */
 150         dir_hash_ent_t          *first;         /* ptr to first added entry */
 151         dir_hash_ent_t          *last;          /* ptr to last added entry */
 152         dir_hash_ent_t          **byhash;       /* ptr to name hash buckets */
 153         dir_hash_ent_t          **byaddr;       /* ptr to addr hash buckets */
 154 } dir_hash_tab_t;
 155
 156 #define DIR_HASH_TAB_SIZE(n)    \
 157         (sizeof(dir_hash_tab_t) + (sizeof(dir_hash_ent_t *) * (n) * 2))
 158 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
 159
 160 /*
 161  * Track the contents of the freespace table in a directory.
 162  */
 163 typedef struct freetab {
 164         int                     naents; /* expected number of data blocks */
 165         int                     nents;  /* number of data blocks processed */
 166         struct freetab_ent {
 167                 xfs_dir2_data_off_t     v;
 168                 short                   s;
 169         } ents[1];
 170 } freetab_t;
 171 #define FREETAB_SIZE(n) \
 172         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 173
 174 #define DIR_HASH_CK_OK          0
 175 #define DIR_HASH_CK_DUPLEAF     1
 176 #define DIR_HASH_CK_BADHASH     2
 177 #define DIR_HASH_CK_NODATA      3
 178 #define DIR_HASH_CK_NOLEAF      4
 179 #define DIR_HASH_CK_BADSTALE    5
 180 #define DIR_HASH_CK_TOTAL       6
 181
 182 /*
 183  * Need to handle CRC and validation errors specially here. If there is a
 184  * validator error, re-read without the verifier so that we get a buffer we can
 185  * check and repair. Re-attach the ops to the buffer after the read so that when
 186  * it is rewritten the CRC is recalculated.
 187  *
 188  * If the buffer was not read, we return an error. If the buffer was read but
 189  * had a CRC or corruption error, we reread it without the verifier and if it is
 190  * read successfully we increment *crc_error and return 0. Otherwise we
 191  * return the read error.
 192  */
 193 static int
 194 dir_read_buf(
 195         struct xfs_inode        *ip,
 196         xfs_dablk_t             bno,
 197         xfs_daddr_t             mappedbno,
 198         struct xfs_buf          **bpp,
 199         const struct xfs_buf_ops *ops,
 200         int                     *crc_error)
 201 {
 202         int error;
 203         int error2;
 204
 205         error = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 206                                    XFS_DATA_FORK, ops);
 207
 208         if (error != EFSBADCRC && error != EFSCORRUPTED)
 209                 return error;
 210
 211         error2 = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 212                                    XFS_DATA_FORK, NULL);
 213         if (error2)
 214                 return error2;
 215
 216         (*crc_error)++;
 217         (*bpp)->b_ops = ops;
 218         return 0;
 219 }
 220
 221 /*
 222  * Returns 0 if the name already exists (ie. a duplicate)
 223  */
 224 static int
 225 dir_hash_add(
 226         xfs_mount_t             *mp,
 227         dir_hash_tab_t          *hashtab,
 228         uint32_t                addr,
 229         xfs_ino_t               inum,
 230         int                     namelen,
 231         unsigned char           *name,
 232         uint8_t                 ftype)
 233 {
 234         xfs_dahash_t            hash = 0;
 235         int                     byaddr;
 236         int                     byhash = 0;
 237         dir_hash_ent_t          *p;
 238         int                     dup;
 239         short                   junk;
 240         struct xfs_name         xname;
 241
 242         ASSERT(!hashtab->names_duped);
 243
 244         xname.name = name;
 245         xname.len = namelen;
 246         xname.type = ftype;
 247
 248         junk = name[0] == '/';
 249         byaddr = DIR_HASH_FUNC(hashtab, addr);
 250         dup = 0;
 251
 252         if (!junk) {
 253                 hash = mp->m_dirnameops->hashname(&xname);
 254                 byhash = DIR_HASH_FUNC(hashtab, hash);
 255
 256                 /*
 257                  * search hash bucket for existing name.
 258                  */
 259                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 260                         if (p->hashval == hash && p->name.len == namelen) {
 261                                 if (memcmp(p->name.name, name, namelen) == 0) {
 262                                         dup = 1;
 263                                         junk = 1;
 264                                         break;
 265                                 }
 266                         }
 267                 }
 268         }
 269
 270         if ((p = malloc(sizeof(*p))) == NULL)
 271                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 272                         sizeof(*p));
 273
 274         p->nextbyaddr = hashtab->byaddr[byaddr];
 275         hashtab->byaddr[byaddr] = p;
 276         if (hashtab->last)
 277                 hashtab->last->nextbyorder = p;
 278         else
 279                 hashtab->first = p;
 280         p->nextbyorder = NULL;
 281         hashtab->last = p;
 282
 283         if (!(p->junkit = junk)) {
 284                 p->hashval = hash;
 285                 p->nextbyhash = hashtab->byhash[byhash];
 286                 hashtab->byhash[byhash] = p;
 287         }
 288         p->address = addr;
 289         p->inum = inum;
 290         p->seen = 0;
 291         p->name = xname;
 292
 293         return !dup;
 294 }
 295
 296 /*
 297  * checks to see if any data entries are not in the leaf blocks
 298  */
 299 static int
 300 dir_hash_unseen(
 301         dir_hash_tab_t  *hashtab)
 302 {
 303         int             i;
 304         dir_hash_ent_t  *p;
 305
 306         for (i = 0; i < hashtab->size; i++) {
 307                 for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 308                         if (p->seen == 0)
 309                                 return 1;
 310                 }
 311         }
 312         return 0;
 313 }
 314
 315 static int
 316 dir_hash_check(
 317         dir_hash_tab_t  *hashtab,
 318         xfs_inode_t     *ip,
 319         int             seeval)
 320 {
 321         static char     *seevalstr[DIR_HASH_CK_TOTAL];
 322         static int      done;
 323
 324         if (!done) {
 325                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 326                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 327                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 328                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 329                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 330                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 331                 done = 1;
 332         }
 333
 334         if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
 335                 seeval = DIR_HASH_CK_NOLEAF;
 336         if (seeval == DIR_HASH_CK_OK)
 337                 return 0;
 338         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 339                 ip->i_ino, seevalstr[seeval]);
 340         if (!no_modify)
 341                 do_warn(_("rebuilding\n"));
 342         else
 343                 do_warn(_("would rebuild\n"));
 344         return 1;
 345 }
 346
 347 static void
 348 dir_hash_done(
 349         dir_hash_tab_t  *hashtab)
 350 {
 351         int             i;
 352         dir_hash_ent_t  *n;
 353         dir_hash_ent_t  *p;
 354
 355         for (i = 0; i < hashtab->size; i++) {
 356                 for (p = hashtab->byaddr[i]; p; p = n) {
 357                         n = p->nextbyaddr;
 358                         if (hashtab->names_duped)
 359                                 free((void *)p->name.name);
 360                         free(p);
 361                 }
 362         }
 363         free(hashtab);
 364 }
 365
 366 static dir_hash_tab_t *
 367 dir_hash_init(
 368         xfs_fsize_t     size)
 369 {
 370         dir_hash_tab_t  *hashtab;
 371         int             hsize;
 372
 373         hsize = size / (16 * 4);
 374         if (hsize > 65536)
 375                 hsize = 63336;
 376         else if (hsize < 16)
 377                 hsize = 16;
 378         if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
 379                 do_error(_("calloc failed in dir_hash_init\n"));
 380         hashtab->size = hsize;
 381         hashtab->byhash = (dir_hash_ent_t**)((char *)hashtab +
 382                 sizeof(dir_hash_tab_t));
 383         hashtab->byaddr = (dir_hash_ent_t**)((char *)hashtab +
 384                 sizeof(dir_hash_tab_t) + sizeof(dir_hash_ent_t*) * hsize);
 385         return hashtab;
 386 }
 387
 388 static int
 389 dir_hash_see(
 390         dir_hash_tab_t          *hashtab,
 391         xfs_dahash_t            hash,
 392         xfs_dir2_dataptr_t      addr)
 393 {
 394         int                     i;
 395         dir_hash_ent_t          *p;
 396
 397         i = DIR_HASH_FUNC(hashtab, addr);
 398         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 399                 if (p->address != addr)
 400                         continue;
 401                 if (p->seen)
 402                         return DIR_HASH_CK_DUPLEAF;
 403                 if (p->junkit == 0 && p->hashval != hash)
 404                         return DIR_HASH_CK_BADHASH;
 405                 p->seen = 1;
 406                 return DIR_HASH_CK_OK;
 407         }
 408         return DIR_HASH_CK_NODATA;
 409 }
 410
 411 static void
 412 dir_hash_update_ftype(
 413         dir_hash_tab_t          *hashtab,
 414         xfs_dir2_dataptr_t      addr,
 415         uint8_t                 ftype)
 416 {
 417         int                     i;
 418         dir_hash_ent_t          *p;
 419
 420         i = DIR_HASH_FUNC(hashtab, addr);
 421         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 422                 if (p->address != addr)
 423                         continue;
 424                 p->name.type = ftype;
 425         }
 426 }
 427
 428 /*
 429  * checks to make sure leafs match a data entry, and that the stale
 430  * count is valid.
 431  */
 432 static int
 433 dir_hash_see_all(
 434         dir_hash_tab_t          *hashtab,
 435         xfs_dir2_leaf_entry_t   *ents,
 436         int                     count,
 437         int                     stale)
 438 {
 439         int                     i;
 440         int                     j;
 441         int                     rval;
 442
 443         for (i = j = 0; i < count; i++) {
 444                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 445                         j++;
 446                         continue;
 447                 }
 448                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 449                                         be32_to_cpu(ents[i].address));
 450                 if (rval != DIR_HASH_CK_OK)
 451                         return rval;
 452         }
 453         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 454 }
 455
 456 /*
 457  * Convert name pointers into locally allocated memory.
 458  * This must only be done after all the entries have been added.
 459  */
 460 static void
 461 dir_hash_dup_names(dir_hash_tab_t *hashtab)
 462 {
 463         unsigned char           *name;
 464         dir_hash_ent_t          *p;
 465
 466         if (hashtab->names_duped)
 467                 return;
 468
 469         for (p = hashtab->first; p; p = p->nextbyorder) {
 470                 name = malloc(p->name.len);
 471                 memcpy(name, p->name.name, p->name.len);
 472                 p->name.name = name;
 473         }
 474         hashtab->names_duped = 1;
 475 }
 476
 477 /*
 478  * Given a block number in a fork, return the next valid block number
 479  * (not a hole).
 480  * If this is the last block number then NULLFILEOFF is returned.
 481  *
 482  * This was originally in the kernel, but only used in xfs_repair.
 483  */
 484 static int
 485 bmap_next_offset(
 486         xfs_trans_t     *tp,                    /* transaction pointer */
 487         xfs_inode_t     *ip,                    /* incore inode */
 488         xfs_fileoff_t   *bnop,                  /* current block */
 489         int             whichfork)              /* data or attr fork */
 490 {
 491         xfs_fileoff_t   bno;                    /* current block */
 492         int             error;                  /* error return value */
 493         xfs_bmbt_irec_t got;                    /* current extent value */
 494         xfs_ifork_t     *ifp;                   /* inode fork pointer */
 495         struct xfs_iext_cursor  icur;
 496
 497         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 498             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 499             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
 500                return EIO;
 501         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 502                 *bnop = NULLFILEOFF;
 503                 return 0;
 504         }
 505         ifp = XFS_IFORK_PTR(ip, whichfork);
 506         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 507             (error = -libxfs_iread_extents(tp, ip, whichfork)))
 508                 return error;
 509         bno = *bnop + 1;
 510         if (!libxfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
 511                 *bnop = NULLFILEOFF;
 512         else
 513                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 514         return 0;
 515 }
 516
 517
 518 static void
 519 res_failed(
 520         int     err)
 521 {
 522         if (err == ENOSPC) {
 523                 do_error(_("ran out of disk space!\n"));
 524         } else
 525                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 526 }
 527
 528 void
 529 mk_rbmino(xfs_mount_t *mp)
 530 {
 531         xfs_trans_t     *tp;
 532         xfs_inode_t     *ip;
 533         xfs_bmbt_irec_t *ep;
 534         xfs_fsblock_t   first;
 535         int             i;
 536         int             nmap;
 537         int             error;
 538         struct xfs_defer_ops    dfops;
 539         xfs_fileoff_t   bno;
 540         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 541         int             vers;
 542         int             times;
 543         struct xfs_trans_res tres = {0};
 544
 545         /*
 546          * first set up inode
 547          */
 548         i = -libxfs_trans_alloc(mp, &tres, 10, 0, 0, &tp);
 549         if (i)
 550                 res_failed(i);
 551
 552         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 553         if (error) {
 554                 do_error(
 555                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 556                         error);
 557         }
 558
 559         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 560         memset(&ip->i_d, 0, sizeof(ip->i_d));
 561
 562         VFS_I(ip)->i_mode = S_IFREG;
 563         ip->i_d.di_version = vers;
 564         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 565         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 566
 567         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 568
 569         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 570         if (ip->i_d.di_version == 3) {
 571                 VFS_I(ip)->i_version = 1;
 572                 ip->i_d.di_flags2 = 0;
 573                 times |= XFS_ICHGTIME_CREATE;
 574         }
 575         libxfs_trans_ichgtime(tp, ip, times);
 576
 577         /*
 578          * now the ifork
 579          */
 580         ip->i_df.if_flags = XFS_IFEXTENTS;
 581         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 582         ip->i_df.if_u1.if_root = NULL;
 583
 584         ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 585
 586         /*
 587          * commit changes
 588          */
 589         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 590         libxfs_trans_commit(tp);
 591
 592         /*
 593          * then allocate blocks for file and fill with zeroes (stolen
 594          * from mkfs)
 595          */
 596         error = -libxfs_trans_alloc(mp, &tres,
 597                 mp->m_sb.sb_rbmblocks + (XFS_BM_MAXLEVELS(mp,XFS_DATA_FORK) - 1),
 598                                    0, 0, &tp);
 599         if (error)
 600                 res_failed(error);
 601
 602         libxfs_trans_ijoin(tp, ip, 0);
 603         bno = 0;
 604         libxfs_defer_init(&dfops, &first);
 605         while (bno < mp->m_sb.sb_rbmblocks) {
 606                 nmap = XFS_BMAP_MAX_NMAP;
 607                 error = -libxfs_bmapi_write(tp, ip, bno,
 608                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 609                           0, &first, mp->m_sb.sb_rbmblocks,
 610                           map, &nmap, &dfops);
 611                 if (error) {
 612                         do_error(
 613                         _("couldn't allocate realtime bitmap, error = %d\n"),
 614                                 error);
 615                 }
 616                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 617                         libxfs_device_zero(mp->m_ddev_targp,
 618                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 619                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 620                         bno += ep->br_blockcount;
 621                 }
 622         }
 623         libxfs_defer_ijoin(&dfops, ip);
 624         error = -libxfs_defer_finish(&tp, &dfops);
 625         if (error) {
 626                 do_error(
 627                 _("allocation of the realtime bitmap failed, error = %d\n"),
 628                         error);
 629         }
 630         libxfs_trans_commit(tp);
 631         IRELE(ip);
 632 }
 633
 634 static int
 635 fill_rbmino(xfs_mount_t *mp)
 636 {
 637         xfs_buf_t       *bp;
 638         xfs_trans_t     *tp;
 639         xfs_inode_t     *ip;
 640         xfs_rtword_t    *bmp;
 641         xfs_fsblock_t   first;
 642         int             nmap;
 643         int             error;
 644         xfs_fileoff_t   bno;
 645         xfs_bmbt_irec_t map;
 646         struct xfs_trans_res tres = {0};
 647
 648         bmp = btmcompute;
 649         bno = 0;
 650
 651         error = -libxfs_trans_alloc(mp, &tres, 10, 0, 0, &tp);
 652         if (error)
 653                 res_failed(error);
 654
 655         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 656         if (error) {
 657                 do_error(
 658                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 659                         error);
 660         }
 661
 662         first = NULLFSBLOCK;
 663         while (bno < mp->m_sb.sb_rbmblocks)  {
 664                 /*
 665                  * fill the file one block at a time
 666                  */
 667                 nmap = 1;
 668                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 669                                         &first, 1, &map, &nmap, NULL);
 670                 if (error || nmap != 1) {
 671                         do_error(
 672         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 673                                 bno, error);
 674                 }
 675
 676                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 677
 678                 error = -libxfs_trans_read_buf(
 679                                 mp, tp, mp->m_dev,
 680                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 681                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 682
 683                 if (error) {
 684                         do_warn(
 685 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 686                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 687                         return(1);
 688                 }
 689
 690                 memmove(XFS_BUF_PTR(bp), bmp, mp->m_sb.sb_blocksize);
 691
 692                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 693
 694                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 695                 bno++;
 696         }
 697
 698         libxfs_trans_commit(tp);
 699         IRELE(ip);
 700         return(0);
 701 }
 702
 703 static int
 704 fill_rsumino(xfs_mount_t *mp)
 705 {
 706         xfs_buf_t       *bp;
 707         xfs_trans_t     *tp;
 708         xfs_inode_t     *ip;
 709         xfs_suminfo_t   *smp;
 710         xfs_fsblock_t   first;
 711         int             nmap;
 712         int             error;
 713         xfs_fileoff_t   bno;
 714         xfs_fileoff_t   end_bno;
 715         xfs_bmbt_irec_t map;
 716         struct xfs_trans_res tres = {0};
 717
 718         smp = sumcompute;
 719         bno = 0;
 720         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 721
 722         error = -libxfs_trans_alloc(mp, &tres, 10, 0, 0, &tp);
 723         if (error)
 724                 res_failed(error);
 725
 726         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 727         if (error) {
 728                 do_error(
 729                 _("couldn't iget realtime summary inode -- error - %d\n"),
 730                         error);
 731         }
 732
 733         first = NULLFSBLOCK;
 734         while (bno < end_bno)  {
 735                 /*
 736                  * fill the file one block at a time
 737                  */
 738                 nmap = 1;
 739                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 740                                         &first, 1, &map, &nmap, NULL);
 741                 if (error || nmap != 1) {
 742                         do_error(
 743         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 744                                 bno, error);
 745                 }
 746
 747                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 748
 749                 error = -libxfs_trans_read_buf(
 750                                 mp, tp, mp->m_dev,
 751                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 752                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 753
 754                 if (error) {
 755                         do_warn(
 756 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 757                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 758                         IRELE(ip);
 759                         return(1);
 760                 }
 761
 762                 memmove(XFS_BUF_PTR(bp), smp, mp->m_sb.sb_blocksize);
 763
 764                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 765
 766                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 767                 bno++;
 768         }
 769
 770         libxfs_trans_commit(tp);
 771         IRELE(ip);
 772         return(0);
 773 }
 774
 775 static void
 776 mk_rsumino(xfs_mount_t *mp)
 777 {
 778         xfs_trans_t     *tp;
 779         xfs_inode_t     *ip;
 780         xfs_bmbt_irec_t *ep;
 781         xfs_fsblock_t   first;
 782         int             i;
 783         int             nmap;
 784         int             error;
 785         int             nsumblocks;
 786         struct xfs_defer_ops    dfops;
 787         xfs_fileoff_t   bno;
 788         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 789         int             vers;
 790         int             times;
 791         struct xfs_trans_res tres = {0};
 792
 793         /*
 794          * first set up inode
 795          */
 796         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 797         if (i)
 798                 res_failed(i);
 799
 800         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 801         if (error) {
 802                 do_error(
 803                 _("couldn't iget realtime summary inode -- error - %d\n"),
 804                         error);
 805         }
 806
 807         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 808         memset(&ip->i_d, 0, sizeof(ip->i_d));
 809
 810         VFS_I(ip)->i_mode = S_IFREG;
 811         ip->i_d.di_version = vers;
 812         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 813         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 814
 815         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 816
 817         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 818         if (ip->i_d.di_version == 3) {
 819                 VFS_I(ip)->i_version = 1;
 820                 ip->i_d.di_flags2 = 0;
 821                 times |= XFS_ICHGTIME_CREATE;
 822         }
 823         libxfs_trans_ichgtime(tp, ip, times);
 824
 825         /*
 826          * now the ifork
 827          */
 828         ip->i_df.if_flags = XFS_IFEXTENTS;
 829         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 830         ip->i_df.if_u1.if_root = NULL;
 831
 832         ip->i_d.di_size = mp->m_rsumsize;
 833
 834         /*
 835          * commit changes
 836          */
 837         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 838         libxfs_trans_commit(tp);
 839
 840         /*
 841          * then allocate blocks for file and fill with zeroes (stolen
 842          * from mkfs)
 843          */
 844         libxfs_defer_init(&dfops, &first);
 845
 846         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 847         tres.tr_logres = BBTOB(128);
 848         tres.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
 849         tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
 850         error = -libxfs_trans_alloc(mp, &tres,
 851                 mp->m_sb.sb_rbmblocks + (XFS_BM_MAXLEVELS(mp,XFS_DATA_FORK) - 1),
 852                                     0, 0, &tp);
 853         if (error)
 854                 res_failed(error);
 855
 856         libxfs_trans_ijoin(tp, ip, 0);
 857         bno = 0;
 858         libxfs_defer_init(&dfops, &first);
 859         while (bno < nsumblocks) {
 860                 nmap = XFS_BMAP_MAX_NMAP;
 861                 error = -libxfs_bmapi_write(tp, ip, bno,
 862                           (xfs_extlen_t)(nsumblocks - bno),
 863                           0, &first, nsumblocks, map, &nmap, &dfops);
 864                 if (error) {
 865                         do_error(
 866                 _("couldn't allocate realtime summary inode, error = %d\n"),
 867                                 error);
 868                 }
 869                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 870                         libxfs_device_zero(mp->m_ddev_targp,
 871                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 872                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 873                         bno += ep->br_blockcount;
 874                 }
 875         }
 876         libxfs_defer_ijoin(&dfops, ip);
 877         error = -libxfs_defer_finish(&tp, &dfops);
 878         if (error) {
 879                 do_error(
 880         _("allocation of the realtime summary ino failed, error = %d\n"),
 881                         error);
 882         }
 883         libxfs_trans_commit(tp);
 884         IRELE(ip);
 885 }
 886
 887 /*
 888  * makes a new root directory.
 889  */
 890 static void
 891 mk_root_dir(xfs_mount_t *mp)
 892 {
 893         xfs_trans_t     *tp;
 894         xfs_inode_t     *ip;
 895         int             i;
 896         int             error;
 897         const mode_t    mode = 0755;
 898         ino_tree_node_t *irec;
 899         int             vers;
 900         int             times;
 901
 902         ip = NULL;
 903         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 904         if (i)
 905                 res_failed(i);
 906
 907         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, 0, &ip);
 908         if (error) {
 909                 do_error(_("could not iget root inode -- error - %d\n"), error);
 910         }
 911
 912         /*
 913          * take care of the core -- initialization from xfs_ialloc()
 914          */
 915         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 916         memset(&ip->i_d, 0, sizeof(ip->i_d));
 917
 918         VFS_I(ip)->i_mode = mode|S_IFDIR;
 919         ip->i_d.di_version = vers;
 920         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 921         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 922
 923         set_nlink(VFS_I(ip), 1);        /* account for . */
 924
 925         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 926         if (ip->i_d.di_version == 3) {
 927                 VFS_I(ip)->i_version = 1;
 928                 ip->i_d.di_flags2 = 0;
 929                 times |= XFS_ICHGTIME_CREATE;
 930         }
 931         libxfs_trans_ichgtime(tp, ip, times);
 932
 933         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 934
 935         /*
 936          * now the ifork
 937          */
 938         ip->i_df.if_flags = XFS_IFEXTENTS;
 939         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 940         ip->i_df.if_u1.if_root = NULL;
 941
 942
 943
 944         /*
 945          * initialize the directory
 946          */
 947         ip->d_ops = mp->m_dir_inode_ops;
 948         libxfs_dir_init(tp, ip, ip);
 949
 950         libxfs_trans_commit(tp);
 951         IRELE(ip);
 952
 953         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 954                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 955         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 956                                 irec->ino_startnum);
 957 }
 958
 959 /*
 960  * orphanage name == lost+found
 961  */
 962 static xfs_ino_t
 963 mk_orphanage(xfs_mount_t *mp)
 964 {
 965         xfs_ino_t       ino;
 966         xfs_trans_t     *tp;
 967         xfs_inode_t     *ip;
 968         xfs_inode_t     *pip;
 969         xfs_fsblock_t   first;
 970         ino_tree_node_t *irec;
 971         int             ino_offset = 0;
 972         int             i;
 973         int             error;
 974         struct xfs_defer_ops    dfops;
 975         const int       mode = 0755;
 976         int             nres;
 977         struct xfs_name xname;
 978
 979         /*
 980          * check for an existing lost+found first, if it exists, return
 981          * its inode. Otherwise, we can create it. Bad lost+found inodes
 982          * would have been cleared in phase3 and phase4.
 983          */
 984
 985         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 986                         &xfs_default_ifork_ops);
 987         if (i)
 988                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 989                         i, ORPHANAGE);
 990
 991         xname.name = (unsigned char *)ORPHANAGE;
 992         xname.len = strlen(ORPHANAGE);
 993         xname.type = XFS_DIR3_FT_DIR;
 994
 995         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 996                 return ino;
 997
 998         /*
 999          * could not be found, create it
1000          */
1001         libxfs_defer_init(&dfops, &first);
1002         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
1003         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
1004         if (i)
1005                 res_failed(i);
1006
1007         /*
1008          * use iget/ijoin instead of trans_iget because the ialloc
1009          * wrapper can commit the transaction and start a new one
1010          */
1011 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
1012                         &xfs_default_ifork_ops);
1013         if (i)
1014                 do_error(_("%d - couldn't iget root inode to make %s\n"),
1015                         i, ORPHANAGE);*/
1016
1017         error = -libxfs_inode_alloc(&tp, pip, mode|S_IFDIR,
1018                                         1, 0, &zerocr, &zerofsx, &ip);
1019         if (error) {
1020                 do_error(_("%s inode allocation failed %d\n"),
1021                         ORPHANAGE, error);
1022         }
1023         inc_nlink(VFS_I(ip));           /* account for . */
1024         ino = ip->i_ino;
1025
1026         irec = find_inode_rec(mp,
1027                         XFS_INO_TO_AGNO(mp, ino),
1028                         XFS_INO_TO_AGINO(mp, ino));
1029
1030         if (irec == NULL) {
1031                 /*
1032                  * This inode is allocated from a newly created inode
1033                  * chunk and therefore did not exist when inode chunks
1034                  * were processed in phase3. Add this group of inodes to
1035                  * the entry avl tree as if they were discovered in phase3.
1036                  */
1037                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
1038                                             XFS_INO_TO_AGINO(mp, ino));
1039                 alloc_ex_data(irec);
1040
1041                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
1042                         set_inode_free(irec, i);
1043         }
1044
1045         ino_offset = get_inode_offset(mp, ino, irec);
1046
1047         /*
1048          * Mark the inode allocated to lost+found as used in the AVL tree
1049          * so it is not skipped in phase 7
1050          */
1051         set_inode_used(irec, ino_offset);
1052         add_inode_ref(irec, ino_offset);
1053
1054         /*
1055          * now that we know the transaction will stay around,
1056          * add the root inode to it
1057          */
1058         libxfs_trans_ijoin(tp, pip, 0);
1059
1060         /*
1061          * create the actual entry
1062          */
1063         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, &first,
1064                                         &dfops, nres);
1065         if (error)
1066                 do_error(
1067                 _("can't make %s, createname error %d\n"),
1068                         ORPHANAGE, error);
1069
1070         /*
1071          * bump up the link count in the root directory to account
1072          * for .. in the new directory
1073          */
1074         inc_nlink(VFS_I(pip));
1075         add_inode_ref(find_inode_rec(mp,
1076                                 XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
1077                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)), 0);
1078
1079
1080
1081         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
1082         libxfs_dir_init(tp, ip, pip);
1083         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1084
1085         libxfs_defer_ijoin(&dfops, ip);
1086         error = -libxfs_defer_finish(&tp, &dfops);
1087         if (error) {
1088                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1089                         ORPHANAGE, error);
1090         }
1091
1092
1093         libxfs_trans_commit(tp);
1094         IRELE(ip);
1095         IRELE(pip);
1096         add_inode_reached(irec,ino_offset);
1097
1098         return(ino);
1099 }
1100
1101 /*
1102  * move a file to the orphange.
1103  */
1104 static void
1105 mv_orphanage(
1106         xfs_mount_t             *mp,
1107         xfs_ino_t               ino,            /* inode # to be moved */
1108         int                     isa_dir)        /* 1 if inode is a directory */
1109 {
1110         xfs_inode_t             *orphanage_ip;
1111         xfs_ino_t               entry_ino_num;
1112         xfs_inode_t             *ino_p;
1113         xfs_trans_t             *tp;
1114         xfs_fsblock_t           first;
1115         struct xfs_defer_ops            dfops;
1116         int                     err;
1117         unsigned char           fname[MAXPATHLEN + 1];
1118         int                     nres;
1119         int                     incr;
1120         ino_tree_node_t         *irec;
1121         int                     ino_offset = 0;
1122         struct xfs_name         xname;
1123
1124         xname.name = fname;
1125         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1126                                 (unsigned long long)ino);
1127
1128         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip,
1129                         &xfs_default_ifork_ops);
1130         if (err)
1131                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1132         /*
1133          * Make sure the filename is unique in the lost+found
1134          */
1135         incr = 0;
1136         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1137                                                                 NULL) == 0)
1138                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1139                                         (unsigned long long)ino, ++incr);
1140
1141         /* Orphans may not have a proper parent, so use custom ops here */
1142         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &phase6_ifork_ops);
1143         if (err)
1144                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1145
1146         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1147
1148         if (isa_dir)  {
1149                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1150                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1151                 if (irec)
1152                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1153                                         irec->ino_startnum;
1154                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1155                        XFS_DIRENTER_SPACE_RES(mp, 2);
1156                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1157                                         &entry_ino_num, NULL);
1158                 if (err) {
1159                         ASSERT(err == ENOENT);
1160
1161                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1162                                                   nres, 0, 0, &tp);
1163                         if (err)
1164                                 do_error(
1165         _("space reservation failed (%d), filesystem may be out of space\n"),
1166                                         err);
1167
1168                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1169                         libxfs_trans_ijoin(tp, ino_p, 0);
1170
1171                         libxfs_defer_init(&dfops, &first);
1172                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1173                                                 ino, &first, &dfops, nres);
1174                         if (err)
1175                                 do_error(
1176         _("name create failed in %s (%d), filesystem may be out of space\n"),
1177                                         ORPHANAGE, err);
1178
1179                         if (irec)
1180                                 add_inode_ref(irec, ino_offset);
1181                         else
1182                                 inc_nlink(VFS_I(orphanage_ip));
1183                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1184
1185                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1186                                         orphanage_ino, &first, &dfops, nres);
1187                         if (err)
1188                                 do_error(
1189         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1190                                         err);
1191
1192                         inc_nlink(VFS_I(ino_p));
1193                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1194
1195                         libxfs_defer_ijoin(&dfops, ino_p);
1196                         err = -libxfs_defer_finish(&tp, &dfops);
1197                         if (err)
1198                                 do_error(
1199         _("bmap finish failed (err - %d), filesystem may be out of space\n"),
1200                                         err);
1201
1202                         libxfs_trans_commit(tp);
1203                 } else  {
1204                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1205                                                   nres, 0, 0, &tp);
1206                         if (err)
1207                                 do_error(
1208         _("space reservation failed (%d), filesystem may be out of space\n"),
1209                                         err);
1210
1211                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1212                         libxfs_trans_ijoin(tp, ino_p, 0);
1213
1214                         libxfs_defer_init(&dfops, &first);
1215
1216                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1217                                                 ino, &first, &dfops, nres);
1218                         if (err)
1219                                 do_error(
1220         _("name create failed in %s (%d), filesystem may be out of space\n"),
1221                                         ORPHANAGE, err);
1222
1223                         if (irec)
1224                                 add_inode_ref(irec, ino_offset);
1225                         else
1226                                 inc_nlink(VFS_I(orphanage_ip));
1227                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1228
1229                         /*
1230                          * don't replace .. value if it already points
1231                          * to us.  that'll pop a libxfs/kernel ASSERT.
1232                          */
1233                         if (entry_ino_num != orphanage_ino)  {
1234                                 err = -libxfs_dir_replace(tp, ino_p,
1235                                                 &xfs_name_dotdot, orphanage_ino,
1236                                                 &first, &dfops, nres);
1237                                 if (err)
1238                                         do_error(
1239         _("name replace op failed (%d), filesystem may be out of space\n"),
1240                                                 err);
1241                         }
1242
1243                         libxfs_defer_ijoin(&dfops, ino_p);
1244                         err = -libxfs_defer_finish(&tp, &dfops);
1245                         if (err)
1246                                 do_error(
1247         _("bmap finish failed (%d), filesystem may be out of space\n"),
1248                                         err);
1249
1250                         libxfs_trans_commit(tp);
1251                 }
1252
1253         } else  {
1254                 /*
1255                  * use the remove log reservation as that's
1256                  * more accurate.  we're only creating the
1257                  * links, we're not doing the inode allocation
1258                  * also accounted for in the create
1259                  */
1260                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1261                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1262                                           nres, 0, 0, &tp);
1263                 if (err)
1264                         do_error(
1265         _("space reservation failed (%d), filesystem may be out of space\n"),
1266                                 err);
1267
1268                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1269                 libxfs_trans_ijoin(tp, ino_p, 0);
1270
1271                 libxfs_defer_init(&dfops, &first);
1272                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1273                                                 &first, &dfops, nres);
1274                 if (err)
1275                         do_error(
1276         _("name create failed in %s (%d), filesystem may be out of space\n"),
1277                                 ORPHANAGE, err);
1278                 ASSERT(err == 0);
1279
1280                 set_nlink(VFS_I(ino_p), 1);
1281                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1282
1283                 libxfs_defer_ijoin(&dfops, ino_p);
1284                 err = -libxfs_defer_finish(&tp, &dfops);
1285                 if (err)
1286                         do_error(
1287         _("bmap finish failed (%d), filesystem may be out of space\n"),
1288                                 err);
1289
1290                 libxfs_trans_commit(tp);
1291         }
1292         IRELE(ino_p);
1293         IRELE(orphanage_ip);
1294 }
1295
1296 static int
1297 entry_junked(
1298         const char      *msg,
1299         const char      *iname,
1300         xfs_ino_t       ino1,
1301         xfs_ino_t       ino2)
1302 {
1303         do_warn(msg, iname, ino1, ino2);
1304         if (!no_modify) {
1305                 if (verbose)
1306                         do_warn(_(", marking entry to be junked\n"));
1307                 else
1308                         do_warn("\n");
1309         } else
1310                 do_warn(_(", would junk entry\n"));
1311         return !no_modify;
1312 }
1313
1314 /* Find and invalidate all the directory's buffers. */
1315 static int
1316 dir_binval(
1317         struct xfs_trans        *tp,
1318         struct xfs_inode        *ip,
1319         int                     whichfork)
1320 {
1321         struct xfs_iext_cursor  icur;
1322         struct xfs_bmbt_irec    rec;
1323         struct xfs_ifork        *ifp;
1324         struct xfs_da_geometry  *geo;
1325         struct xfs_buf          *bp;
1326         xfs_dablk_t             dabno, end_dabno;
1327         int                     error = 0;
1328
1329         if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
1330             ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
1331                 return 0;
1332
1333         geo = tp->t_mountp->m_dir_geo;
1334         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1335         for_each_xfs_iext(ifp, &icur, &rec) {
1336                 dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1337                                 geo->fsbcount - 1);
1338                 end_dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1339                                 rec.br_blockcount);
1340                 for (; dabno <= end_dabno; dabno += geo->fsbcount) {
1341                         bp = NULL;
1342                         error = -libxfs_da_get_buf(tp, ip, dabno, -2, &bp,
1343                                         whichfork);
1344                         if (error)
1345                                 return error;
1346                         if (!bp)
1347                                 continue;
1348                         libxfs_trans_binval(tp, bp);
1349                         libxfs_trans_brelse(tp, bp);
1350                 }
1351         }
1352
1353         return error;
1354 }
1355
1356 /*
1357  * Unexpected failure during the rebuild will leave the entries in
1358  * lost+found on the next run
1359  */
1360
1361 static void
1362 longform_dir2_rebuild(
1363         xfs_mount_t             *mp,
1364         xfs_ino_t               ino,
1365         xfs_inode_t             *ip,
1366         ino_tree_node_t         *irec,
1367         int                     ino_offset,
1368         dir_hash_tab_t          *hashtab)
1369 {
1370         int                     error;
1371         int                     nres;
1372         xfs_trans_t             *tp;
1373         xfs_fileoff_t           lastblock;
1374         xfs_fsblock_t           firstblock;
1375         struct xfs_defer_ops            dfops;
1376         xfs_inode_t             pip;
1377         dir_hash_ent_t          *p;
1378         int                     done;
1379
1380         /*
1381          * trash directory completely and rebuild from scratch using the
1382          * name/inode pairs in the hash table
1383          */
1384
1385         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1386
1387         /*
1388          * first attempt to locate the parent inode, if it can't be
1389          * found, set it to the root inode and it'll be moved to the
1390          * orphanage later (the inode number here needs to be valid
1391          * for the libxfs_dir_init() call).
1392          */
1393         pip.i_ino = get_inode_parent(irec, ino_offset);
1394         if (pip.i_ino == NULLFSINO ||
1395             libxfs_dir_ino_validate(mp, pip.i_ino))
1396                 pip.i_ino = mp->m_sb.sb_rootino;
1397
1398         libxfs_defer_init(&dfops, &firstblock);
1399
1400         nres = XFS_REMOVE_SPACE_RES(mp);
1401         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1402         if (error)
1403                 res_failed(error);
1404         libxfs_trans_ijoin(tp, ip, 0);
1405
1406         error = dir_binval(tp, ip, XFS_DATA_FORK);
1407         if (error)
1408                 res_failed(error);
1409
1410         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1411                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1412                         error);
1413
1414         /* free all data, leaf, node and freespace blocks */
1415         error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA, 0,
1416                                 &firstblock, &dfops, &done);
1417         if (error) {
1418                 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1419                 goto out_bmap_cancel;
1420         }
1421
1422         ASSERT(done);
1423
1424         error = -libxfs_dir_init(tp, ip, &pip);
1425         if (error) {
1426                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1427                 goto out_bmap_cancel;
1428         }
1429
1430         libxfs_defer_ijoin(&dfops, ip);
1431         error = -libxfs_defer_finish(&tp, &dfops);
1432
1433         libxfs_trans_commit(tp);
1434
1435         if (ino == mp->m_sb.sb_rootino)
1436                 need_root_dotdot = 0;
1437
1438         /* go through the hash list and re-add the inodes */
1439
1440         for (p = hashtab->first; p; p = p->nextbyorder) {
1441
1442                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1443                                 (p->name.len == 1 || (p->name.len == 2 &&
1444                                                 p->name.name[1] == '.'))))
1445                         continue;
1446
1447                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1448                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1449                                             nres, 0, 0, &tp);
1450                 if (error)
1451                         res_failed(error);
1452
1453                 libxfs_trans_ijoin(tp, ip, 0);
1454
1455                 libxfs_defer_init(&dfops, &firstblock);
1456                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1457                                                 &firstblock, &dfops, nres);
1458                 if (error) {
1459                         do_warn(
1460 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1461                                 ino, error);
1462                         goto out_bmap_cancel;
1463                 }
1464
1465                 libxfs_defer_ijoin(&dfops, ip);
1466                 error = -libxfs_defer_finish(&tp, &dfops);
1467                 if (error) {
1468                         do_warn(
1469         _("bmap finish failed (%d), filesystem may be out of space\n"),
1470                                 error);
1471                         goto out_bmap_cancel;
1472                 }
1473
1474                 libxfs_trans_commit(tp);
1475         }
1476
1477         return;
1478
1479 out_bmap_cancel:
1480         libxfs_defer_cancel(&dfops);
1481         libxfs_trans_cancel(tp);
1482         return;
1483 }
1484
1485
1486 /*
1487  * Kill a block in a version 2 inode.
1488  * Makes its own transaction.
1489  */
1490 static void
1491 dir2_kill_block(
1492         xfs_mount_t     *mp,
1493         xfs_inode_t     *ip,
1494         xfs_dablk_t     da_bno,
1495         struct xfs_buf  *bp)
1496 {
1497         xfs_da_args_t   args;
1498         int             error;
1499         xfs_fsblock_t   firstblock;
1500         struct xfs_defer_ops    dfops;
1501         int             nres;
1502         xfs_trans_t     *tp;
1503
1504         nres = XFS_REMOVE_SPACE_RES(mp);
1505         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1506         if (error)
1507                 res_failed(error);
1508         libxfs_trans_ijoin(tp, ip, 0);
1509         libxfs_trans_bjoin(tp, bp);
1510         memset(&args, 0, sizeof(args));
1511         libxfs_defer_init(&dfops, &firstblock);
1512         args.dp = ip;
1513         args.trans = tp;
1514         args.firstblock = &firstblock;
1515         args.dfops = &dfops;
1516         args.whichfork = XFS_DATA_FORK;
1517         args.geo = mp->m_dir_geo;
1518         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1519                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1520         else
1521                 error = -libxfs_dir2_shrink_inode(&args,
1522                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1523         if (error)
1524                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1525                         ip->i_ino, da_bno);
1526         libxfs_defer_ijoin(&dfops, ip);
1527         libxfs_defer_finish(&tp, &dfops);
1528         libxfs_trans_commit(tp);
1529 }
1530
1531 /*
1532  * process a data block, also checks for .. entry
1533  * and corrects it to match what we think .. should be
1534  */
1535 static void
1536 longform_dir2_entry_check_data(
1537         xfs_mount_t             *mp,
1538         xfs_inode_t             *ip,
1539         int                     *num_illegal,
1540         int                     *need_dot,
1541         ino_tree_node_t         *current_irec,
1542         int                     current_ino_offset,
1543         struct xfs_buf          **bpp,
1544         dir_hash_tab_t          *hashtab,
1545         freetab_t               **freetabp,
1546         xfs_dablk_t             da_bno,
1547         int                     isblock)
1548 {
1549         xfs_dir2_dataptr_t      addr;
1550         xfs_dir2_leaf_entry_t   *blp;
1551         struct xfs_buf          *bp;
1552         xfs_dir2_block_tail_t   *btp;
1553         struct xfs_dir2_data_hdr *d;
1554         xfs_dir2_db_t           db;
1555         xfs_dir2_data_entry_t   *dep;
1556         xfs_dir2_data_unused_t  *dup;
1557         struct xfs_dir2_data_free *bf;
1558         char                    *endptr;
1559         int                     error;
1560         xfs_fsblock_t           firstblock;
1561         struct xfs_defer_ops            dfops;
1562         char                    fname[MAXNAMELEN + 1];
1563         freetab_t               *freetab;
1564         int                     i;
1565         int                     ino_offset;
1566         xfs_ino_t               inum;
1567         ino_tree_node_t         *irec;
1568         int                     junkit;
1569         int                     lastfree;
1570         int                     len;
1571         int                     nbad;
1572         int                     needlog;
1573         int                     needscan;
1574         xfs_ino_t               parent;
1575         char                    *ptr;
1576         xfs_trans_t             *tp;
1577         int                     wantmagic;
1578         struct xfs_da_args      da = {
1579                 .dp = ip,
1580                 .geo = mp->m_dir_geo,
1581         };
1582
1583
1584         bp = *bpp;
1585         d = bp->b_addr;
1586         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1587         nbad = 0;
1588         needscan = needlog = 0;
1589         junkit = 0;
1590         freetab = *freetabp;
1591         if (isblock) {
1592                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1593                 blp = xfs_dir2_block_leaf_p(btp);
1594                 endptr = (char *)blp;
1595                 if (endptr > (char *)btp)
1596                         endptr = (char *)btp;
1597                 if (xfs_sb_version_hascrc(&mp->m_sb))
1598                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1599                 else
1600                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1601         } else {
1602                 endptr = (char *)d + mp->m_dir_geo->blksize;
1603                 if (xfs_sb_version_hascrc(&mp->m_sb))
1604                         wantmagic = XFS_DIR3_DATA_MAGIC;
1605                 else
1606                         wantmagic = XFS_DIR2_DATA_MAGIC;
1607         }
1608         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1609
1610         /* check for data block beyond expected end */
1611         if (freetab->naents <= db) {
1612                 struct freetab_ent e;
1613
1614                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1615                 if (!freetab) {
1616                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1617                                 __func__, FREETAB_SIZE(db + 1));
1618                 }
1619                 e.v = NULLDATAOFF;
1620                 e.s = 0;
1621                 for (i = freetab->naents; i < db; i++)
1622                         freetab->ents[i] = e;
1623                 freetab->naents = db + 1;
1624         }
1625
1626         /* check the data block */
1627         while (ptr < endptr) {
1628
1629                 /* check for freespace */
1630                 dup = (xfs_dir2_data_unused_t *)ptr;
1631                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1632
1633                         /* check for invalid freespace length */
1634                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1635                                         be16_to_cpu(dup->length) == 0 ||
1636                                         (be16_to_cpu(dup->length) &
1637                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1638                                 break;
1639
1640                         /* check for invalid tag */
1641                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1642                                                 (char *)dup - (char *)d)
1643                                 break;
1644
1645                         /* check for block with no data entries */
1646                         if ((ptr == (char *)M_DIROPS(mp)->data_entry_p(d)) &&
1647                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1648                                 junkit = 1;
1649                                 *num_illegal += 1;
1650                                 break;
1651                         }
1652
1653                         /* continue at the end of the freespace */
1654                         ptr += be16_to_cpu(dup->length);
1655                         if (ptr >= endptr)
1656                                 break;
1657                 }
1658
1659                 /* validate data entry size */
1660                 dep = (xfs_dir2_data_entry_t *)ptr;
1661                 if (ptr + M_DIROPS(mp)->data_entsize(dep->namelen) > endptr)
1662                         break;
1663                 if (be16_to_cpu(*M_DIROPS(mp)->data_entry_tag_p(dep)) !=
1664                                                 (char *)dep - (char *)d)
1665                         break;
1666                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1667         }
1668
1669         /* did we find an empty or corrupt block? */
1670         if (ptr != endptr) {
1671                 if (junkit) {
1672                         do_warn(
1673         _("empty data block %u in directory inode %" PRIu64 ": "),
1674                                 da_bno, ip->i_ino);
1675                 } else {
1676                         do_warn(_
1677         ("corrupt block %u in directory inode %" PRIu64 ": "),
1678                                 da_bno, ip->i_ino);
1679                 }
1680                 if (!no_modify) {
1681                         do_warn(_("junking block\n"));
1682                         dir2_kill_block(mp, ip, da_bno, bp);
1683                 } else {
1684                         do_warn(_("would junk block\n"));
1685                         libxfs_putbuf(bp);
1686                 }
1687                 freetab->ents[db].v = NULLDATAOFF;
1688                 *bpp = NULL;
1689                 return;
1690         }
1691
1692         /* update number of data blocks processed */
1693         if (freetab->nents < db + 1)
1694                 freetab->nents = db + 1;
1695
1696         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1697         if (error)
1698                 res_failed(error);
1699         da.trans = tp;
1700         libxfs_trans_ijoin(tp, ip, 0);
1701         libxfs_trans_bjoin(tp, bp);
1702         libxfs_trans_bhold(tp, bp);
1703         libxfs_defer_init(&dfops, &firstblock);
1704         if (be32_to_cpu(d->magic) != wantmagic) {
1705                 do_warn(
1706         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1707                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1708                 if (!no_modify) {
1709                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1710                         d->magic = cpu_to_be32(wantmagic);
1711                         needlog = 1;
1712                 } else
1713                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1714         }
1715         lastfree = 0;
1716         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1717         /*
1718          * look at each entry.  reference inode pointed to by each
1719          * entry in the incore inode tree.
1720          * if not a directory, set reached flag, increment link count
1721          * if a directory and reached, mark entry as to be deleted.
1722          * if a directory, check to see if recorded parent
1723          *      matches current inode #,
1724          *      if so, then set reached flag, increment link count
1725          *              of current and child dir inodes, push the child
1726          *              directory inode onto the directory stack.
1727          *      if current inode != parent, then mark entry to be deleted.
1728          */
1729         while (ptr < endptr) {
1730                 dup = (xfs_dir2_data_unused_t *)ptr;
1731                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1732                         if (lastfree) {
1733                                 do_warn(
1734         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1735                                         ip->i_ino, da_bno);
1736                                 if (!no_modify) {
1737
1738                                         do_warn(_("joining together\n"));
1739                                         len = be16_to_cpu(dup->length);
1740                                         libxfs_dir2_data_use_free(&da, bp, dup,
1741                                                 ptr - (char *)d, len, &needlog,
1742                                                 &needscan);
1743                                         libxfs_dir2_data_make_free(&da, bp,
1744                                                 ptr - (char *)d, len, &needlog,
1745                                                 &needscan);
1746                                 } else
1747                                         do_warn(_("would join together\n"));
1748                         }
1749                         ptr += be16_to_cpu(dup->length);
1750                         lastfree = 1;
1751                         continue;
1752                 }
1753                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1754                                                   ptr - (char *)d);
1755                 dep = (xfs_dir2_data_entry_t *)ptr;
1756                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1757                 inum = be64_to_cpu(dep->inumber);
1758                 lastfree = 0;
1759                 /*
1760                  * skip bogus entries (leading '/').  they'll be deleted
1761                  * later.  must still log it, else we leak references to
1762                  * buffers.
1763                  */
1764                 if (dep->name[0] == '/')  {
1765                         nbad++;
1766                         if (!no_modify)
1767                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1768                         continue;
1769                 }
1770
1771                 memmove(fname, dep->name, dep->namelen);
1772                 fname[dep->namelen] = '\0';
1773                 ASSERT(inum != NULLFSINO);
1774
1775                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1776                                         XFS_INO_TO_AGINO(mp, inum));
1777                 if (irec == NULL)  {
1778                         nbad++;
1779                         if (entry_junked(
1780         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1781                                         fname, ip->i_ino, inum)) {
1782                                 dep->name[0] = '/';
1783                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1784                         }
1785                         continue;
1786                 }
1787                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1788
1789                 /*
1790                  * if it's a free inode, blow out the entry.
1791                  * by now, any inode that we think is free
1792                  * really is free.
1793                  */
1794                 if (is_inode_free(irec, ino_offset))  {
1795                         nbad++;
1796                         if (entry_junked(
1797         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1798                                         fname, ip->i_ino, inum)) {
1799                                 dep->name[0] = '/';
1800                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1801                         }
1802                         continue;
1803                 }
1804
1805                 /*
1806                  * check if this inode is lost+found dir in the root
1807                  */
1808                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1809                         /*
1810                          * if it's not a directory, trash it
1811                          */
1812                         if (!inode_isadir(irec, ino_offset)) {
1813                                 nbad++;
1814                                 if (entry_junked(
1815         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1816                                                 ORPHANAGE, inum, ip->i_ino)) {
1817                                         dep->name[0] = '/';
1818                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1819                                 }
1820                                 continue;
1821                         }
1822                         /*
1823                          * if this is a dup, it will be picked up below,
1824                          * otherwise, mark it as the orphanage for later.
1825                          */
1826                         if (!orphanage_ino)
1827                                 orphanage_ino = inum;
1828                 }
1829
1830                 /*
1831                  * check for duplicate names in directory.
1832                  */
1833                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1834                                 dep->name, M_DIROPS(mp)->data_get_ftype(dep))) {
1835                         nbad++;
1836                         if (entry_junked(
1837         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1838                                         fname, inum, ip->i_ino)) {
1839                                 dep->name[0] = '/';
1840                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1841                         }
1842                         if (inum == orphanage_ino)
1843                                 orphanage_ino = 0;
1844                         continue;
1845                 }
1846
1847                 /*
1848                  * if just scanning to rebuild a directory due to a ".."
1849                  * update, just continue
1850                  */
1851                 if (dotdot_update)
1852                         continue;
1853
1854                 /*
1855                  * skip the '..' entry since it's checked when the
1856                  * directory is reached by something else.  if it never
1857                  * gets reached, it'll be moved to the orphanage and we'll
1858                  * take care of it then. If it doesn't exist at all, the
1859                  * directory needs to be rebuilt first before being added
1860                  * to the orphanage.
1861                  */
1862                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1863                                 dep->name[1] == '.') {
1864                         if (da_bno != 0) {
1865                                 /* ".." should be in the first block */
1866                                 nbad++;
1867                                 if (entry_junked(
1868         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1869                                                 inum, ip->i_ino)) {
1870                                         dep->name[0] = '/';
1871                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1872                                 }
1873                         }
1874                         continue;
1875                 }
1876                 ASSERT(no_modify || !verify_inum(mp, inum));
1877                 /*
1878                  * special case the . entry.  we know there's only one
1879                  * '.' and only '.' points to itself because bogus entries
1880                  * got trashed in phase 3 if there were > 1.
1881                  * bump up link count for '.' but don't set reached
1882                  * until we're actually reached by another directory
1883                  * '..' is already accounted for or will be taken care
1884                  * of when directory is moved to orphanage.
1885                  */
1886                 if (ip->i_ino == inum)  {
1887                         ASSERT(dep->name[0] == '.' && dep->namelen == 1);
1888                         add_inode_ref(current_irec, current_ino_offset);
1889                         if (da_bno != 0 ||
1890                             dep != M_DIROPS(mp)->data_entry_p(d)) {
1891                                 /* "." should be the first entry */
1892                                 nbad++;
1893                                 if (entry_junked(
1894         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1895                                                 fname, inum, ip->i_ino)) {
1896                                         dep->name[0] = '/';
1897                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1898                                 }
1899                         }
1900                         *need_dot = 0;
1901                         continue;
1902                 }
1903                 /*
1904                  * skip entries with bogus inumbers if we're in no modify mode
1905                  */
1906                 if (no_modify && verify_inum(mp, inum))
1907                         continue;
1908
1909                 /* validate ftype field if supported */
1910                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1911                         uint8_t dir_ftype;
1912                         uint8_t ino_ftype;
1913
1914                         dir_ftype = M_DIROPS(mp)->data_get_ftype(dep);
1915                         ino_ftype = get_inode_ftype(irec, ino_offset);
1916
1917                         if (dir_ftype != ino_ftype) {
1918                                 if (no_modify) {
1919                                         do_warn(
1920         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1921                                                 dir_ftype, ino_ftype,
1922                                                 ip->i_ino, inum);
1923                                 } else {
1924                                         do_warn(
1925         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1926                                                 dir_ftype, ino_ftype,
1927                                                 ip->i_ino, inum);
1928                                         M_DIROPS(mp)->data_put_ftype(dep,
1929                                                                 ino_ftype);
1930                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1931                                         dir_hash_update_ftype(hashtab, addr,
1932                                                               ino_ftype);
1933                                 }
1934                         }
1935                 }
1936
1937                 /*
1938                  * check easy case first, regular inode, just bump
1939                  * the link count and continue
1940                  */
1941                 if (!inode_isadir(irec, ino_offset))  {
1942                         add_inode_reached(irec, ino_offset);
1943                         continue;
1944                 }
1945                 parent = get_inode_parent(irec, ino_offset);
1946                 ASSERT(parent != 0);
1947                 junkit = 0;
1948                 /*
1949                  * bump up the link counts in parent and child
1950                  * directory but if the link doesn't agree with
1951                  * the .. in the child, blow out the entry.
1952                  * if the directory has already been reached,
1953                  * blow away the entry also.
1954                  */
1955                 if (is_inode_reached(irec, ino_offset))  {
1956                         junkit = 1;
1957                         do_warn(
1958 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1959                                 fname, ip->i_ino, inum);
1960                 } else if (parent == ip->i_ino)  {
1961                         add_inode_reached(irec, ino_offset);
1962                         add_inode_ref(current_irec, current_ino_offset);
1963                 } else if (parent == NULLFSINO) {
1964                         /* ".." was missing, but this entry refers to it,
1965                            so, set it as the parent and mark for rebuild */
1966                         do_warn(
1967         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1968                                 fname, ip->i_ino, inum);
1969                         set_inode_parent(irec, ino_offset, ip->i_ino);
1970                         add_inode_reached(irec, ino_offset);
1971                         add_inode_ref(current_irec, current_ino_offset);
1972                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1973                                                                 ino_offset);
1974                 } else  {
1975                         junkit = 1;
1976                         do_warn(
1977 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1978                                 fname, ip->i_ino, parent, inum);
1979                 }
1980                 if (junkit)  {
1981                         if (inum == orphanage_ino)
1982                                 orphanage_ino = 0;
1983                         nbad++;
1984                         if (!no_modify)  {
1985                                 dep->name[0] = '/';
1986                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1987                                 if (verbose)
1988                                         do_warn(
1989                                         _("\twill clear entry \"%s\"\n"),
1990                                                 fname);
1991                         } else  {
1992                                 do_warn(_("\twould clear entry \"%s\"\n"),
1993                                         fname);
1994                         }
1995                 }
1996         }
1997         *num_illegal += nbad;
1998         if (needscan)
1999                 libxfs_dir2_data_freescan_int(mp->m_dir_geo, M_DIROPS(mp),
2000                                 d, &i);
2001         if (needlog)
2002                 libxfs_dir2_data_log_header(&da, bp);
2003         libxfs_defer_ijoin(&dfops, ip);
2004         libxfs_defer_finish(&tp, &dfops);
2005         libxfs_trans_commit(tp);
2006
2007         /* record the largest free space in the freetab for later checking */
2008         bf = M_DIROPS(mp)->data_bestfree_p(d);
2009         freetab->ents[db].v = be16_to_cpu(bf[0].length);
2010         freetab->ents[db].s = 0;
2011 }
2012
2013 /* check v5 metadata */
2014 static int
2015 __check_dir3_header(
2016         struct xfs_mount        *mp,
2017         struct xfs_buf          *bp,
2018         xfs_ino_t               ino,
2019         __be64                  owner,
2020         __be64                  blkno,
2021         uuid_t                  *uuid)
2022 {
2023
2024         /* verify owner */
2025         if (be64_to_cpu(owner) != ino) {
2026                 do_warn(
2027 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
2028                         ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
2029                 return 1;
2030         }
2031         /* verify block number */
2032         if (be64_to_cpu(blkno) != bp->b_bn) {
2033                 do_warn(
2034 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
2035                         bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
2036                 return 1;
2037         }
2038         /* verify uuid */
2039         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
2040                 do_warn(
2041 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
2042                         ino, bp->b_bn);
2043                 return 1;
2044         }
2045
2046         return 0;
2047 }
2048
2049 static int
2050 check_da3_header(
2051         struct xfs_mount        *mp,
2052         struct xfs_buf          *bp,
2053         xfs_ino_t               ino)
2054 {
2055         struct xfs_da3_blkinfo  *info = bp->b_addr;
2056
2057         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2058                                    &info->uuid);
2059 }
2060
2061 static int
2062 check_dir3_header(
2063         struct xfs_mount        *mp,
2064         struct xfs_buf          *bp,
2065         xfs_ino_t               ino)
2066 {
2067         struct xfs_dir3_blk_hdr *info = bp->b_addr;
2068
2069         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2070                                    &info->uuid);
2071 }
2072
2073 /*
2074  * Check contents of leaf-form block.
2075  */
2076 static int
2077 longform_dir2_check_leaf(
2078         xfs_mount_t             *mp,
2079         xfs_inode_t             *ip,
2080         dir_hash_tab_t          *hashtab,
2081         freetab_t               *freetab)
2082 {
2083         int                     badtail;
2084         __be16                  *bestsp;
2085         struct xfs_buf          *bp;
2086         xfs_dablk_t             da_bno;
2087         int                     i;
2088         xfs_dir2_leaf_t         *leaf;
2089         xfs_dir2_leaf_tail_t    *ltp;
2090         int                     seeval;
2091         struct xfs_dir2_leaf_entry *ents;
2092         struct xfs_dir3_icleaf_hdr leafhdr;
2093         int                     error;
2094         int                     fixit = 0;
2095
2096         da_bno = mp->m_dir_geo->leafblk;
2097         error = dir_read_buf(ip, da_bno, -1, &bp, &xfs_dir3_leaf1_buf_ops,
2098                              &fixit);
2099         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
2100                 do_warn(
2101         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
2102                         da_bno, ip->i_ino);
2103                 return 1;
2104         } else if (error) {
2105                 do_error(
2106         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
2107                         da_bno, ip->i_ino, error);
2108                 /* NOTREACHED */
2109         }
2110
2111         leaf = bp->b_addr;
2112         M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2113         ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2114         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
2115         bestsp = xfs_dir2_leaf_bests_p(ltp);
2116         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
2117               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
2118                                 leafhdr.forw || leafhdr.back ||
2119                                 leafhdr.count < leafhdr.stale ||
2120                                 leafhdr.count >
2121                                         M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2122                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2123                 do_warn(
2124         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2125                         da_bno, ip->i_ino);
2126                 libxfs_putbuf(bp);
2127                 return 1;
2128         }
2129
2130         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2131                 error = check_da3_header(mp, bp, ip->i_ino);
2132                 if (error) {
2133                         libxfs_putbuf(bp);
2134                         return error;
2135                 }
2136         }
2137
2138         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2139         if (dir_hash_check(hashtab, ip, seeval)) {
2140                 libxfs_putbuf(bp);
2141                 return 1;
2142         }
2143         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2144         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2145                 freetab->ents[i].s = 1;
2146                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2147         }
2148         if (badtail) {
2149                 do_warn(
2150         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2151                         da_bno, ip->i_ino);
2152                 libxfs_putbuf(bp);
2153                 return 1;
2154         }
2155         libxfs_putbuf(bp);
2156         return fixit;
2157 }
2158
2159 /*
2160  * Check contents of the node blocks (leaves)
2161  * Looks for matching hash values for the data entries.
2162  */
2163 static int
2164 longform_dir2_check_node(
2165         xfs_mount_t             *mp,
2166         xfs_inode_t             *ip,
2167         dir_hash_tab_t          *hashtab,
2168         freetab_t               *freetab)
2169 {
2170         struct xfs_buf          *bp;
2171         xfs_dablk_t             da_bno;
2172         xfs_dir2_db_t           fdb;
2173         xfs_dir2_free_t         *free;
2174         int                     i;
2175         xfs_dir2_leaf_t         *leaf;
2176         xfs_fileoff_t           next_da_bno;
2177         int                     seeval = 0;
2178         int                     used;
2179         struct xfs_dir2_leaf_entry *ents;
2180         struct xfs_dir3_icleaf_hdr leafhdr;
2181         struct xfs_dir3_icfree_hdr freehdr;
2182         __be16                  *bests;
2183         int                     error;
2184         int                     fixit = 0;
2185
2186         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2187                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2188                         da_bno = (xfs_dablk_t)next_da_bno) {
2189                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2190                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2191                         break;
2192
2193                 /*
2194                  * we need to use the da3 node verifier here as it handles the
2195                  * fact that reading the leaf hash tree blocks can return either
2196                  * leaf or node blocks and calls the correct verifier. If we get
2197                  * a node block, then we'll skip it below based on a magic
2198                  * number check.
2199                  */
2200                 error = dir_read_buf(ip, da_bno, -1, &bp,
2201                                      &xfs_da3_node_buf_ops, &fixit);
2202                 if (error) {
2203                         do_warn(
2204         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2205                                 da_bno, ip->i_ino, error);
2206                         return 1;
2207                 }
2208                 leaf = bp->b_addr;
2209                 M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2210                 ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2211                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2212                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2213                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2214                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2215                         do_warn(
2216         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2217                                 leafhdr.magic, da_bno, ip->i_ino);
2218                         libxfs_putbuf(bp);
2219                         return 1;
2220                 }
2221
2222                 /* check v5 metadata */
2223                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2224                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2225                         error = check_da3_header(mp, bp, ip->i_ino);
2226                         if (error) {
2227                                 libxfs_putbuf(bp);
2228                                 return error;
2229                         }
2230                 }
2231
2232                 /* ignore nodes */
2233                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2234                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2235                         libxfs_putbuf(bp);
2236                         continue;
2237                 }
2238
2239                 /*
2240                  * If there's a validator error, we need to ensure that we got
2241                  * the right ops on the buffer for when we write it back out.
2242                  */
2243                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2244                 if (leafhdr.count > M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2245                     leafhdr.count < leafhdr.stale) {
2246                         do_warn(
2247         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2248                                 da_bno, ip->i_ino);
2249                         libxfs_putbuf(bp);
2250                         return 1;
2251                 }
2252                 seeval = dir_hash_see_all(hashtab, ents,
2253                                         leafhdr.count, leafhdr.stale);
2254                 libxfs_putbuf(bp);
2255                 if (seeval != DIR_HASH_CK_OK)
2256                         return 1;
2257         }
2258         if (dir_hash_check(hashtab, ip, seeval))
2259                 return 1;
2260
2261         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2262              next_da_bno != NULLFILEOFF;
2263              da_bno = (xfs_dablk_t)next_da_bno) {
2264                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2265                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2266                         break;
2267
2268                 error = dir_read_buf(ip, da_bno, -1, &bp,
2269                                      &xfs_dir3_free_buf_ops, &fixit);
2270                 if (error) {
2271                         do_warn(
2272         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2273                                 da_bno, ip->i_ino, error);
2274                         return 1;
2275                 }
2276                 free = bp->b_addr;
2277                 M_DIROPS(mp)->free_hdr_from_disk(&freehdr, free);
2278                 bests = M_DIROPS(mp)->free_bests_p(free);
2279                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2280                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2281                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2282                     freehdr.firstdb !=
2283                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2284                         M_DIROPS(mp)->free_max_bests(mp->m_dir_geo) ||
2285                     freehdr.nvalid < freehdr.nused) {
2286                         do_warn(
2287         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2288                                 da_bno, ip->i_ino);
2289                         libxfs_putbuf(bp);
2290                         return 1;
2291                 }
2292
2293                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2294                         error = check_dir3_header(mp, bp, ip->i_ino);
2295                         if (error) {
2296                                 libxfs_putbuf(bp);
2297                                 return error;
2298                         }
2299                 }
2300                 for (i = used = 0; i < freehdr.nvalid; i++) {
2301                         if (i + freehdr.firstdb >= freetab->nents ||
2302                                         freetab->ents[i + freehdr.firstdb].v !=
2303                                                 be16_to_cpu(bests[i])) {
2304                                 do_warn(
2305         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2306                                         da_bno, i, ip->i_ino);
2307                                 libxfs_putbuf(bp);
2308                                 return 1;
2309                         }
2310                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2311                         freetab->ents[i + freehdr.firstdb].s = 1;
2312                 }
2313                 if (used != freehdr.nused) {
2314                         do_warn(
2315         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2316                                 da_bno, ip->i_ino);
2317                         libxfs_putbuf(bp);
2318                         return 1;
2319                 }
2320                 libxfs_putbuf(bp);
2321         }
2322         for (i = 0; i < freetab->nents; i++) {
2323                 if ((freetab->ents[i].s == 0) &&
2324                     (freetab->ents[i].v != NULLDATAOFF)) {
2325                         do_warn(
2326         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2327                                 i, ip->i_ino);
2328                         return 1;
2329                 }
2330         }
2331         return fixit;
2332 }
2333
2334 /*
2335  * If a directory is corrupt, we need to read in as many entries as possible,
2336  * destroy the entry and create a new one with recovered name/inode pairs.
2337  * (ie. get libxfs to do all the grunt work)
2338  */
2339 static void
2340 longform_dir2_entry_check(xfs_mount_t   *mp,
2341                         xfs_ino_t       ino,
2342                         xfs_inode_t     *ip,
2343                         int             *num_illegal,
2344                         int             *need_dot,
2345                         ino_tree_node_t *irec,
2346                         int             ino_offset,
2347                         dir_hash_tab_t  *hashtab)
2348 {
2349         struct xfs_buf          **bplist;
2350         xfs_dablk_t             da_bno;
2351         freetab_t               *freetab;
2352         int                     num_bps;
2353         int                     i;
2354         int                     isblock;
2355         int                     isleaf;
2356         xfs_fileoff_t           next_da_bno;
2357         int                     seeval;
2358         int                     fixit = 0;
2359         xfs_dir2_db_t           db;
2360         struct xfs_da_args      args;
2361
2362         *need_dot = 1;
2363         freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2364         if (!freetab) {
2365                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2366                         __func__,
2367                         FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2368                 exit(1);
2369         }
2370         freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2371         freetab->nents = 0;
2372         for (i = 0; i < freetab->naents; i++) {
2373                 freetab->ents[i].v = NULLDATAOFF;
2374                 freetab->ents[i].s = 0;
2375         }
2376         num_bps = freetab->naents;
2377         bplist = calloc(num_bps, sizeof(struct xfs_buf*));
2378         if (!bplist)
2379                 do_error(_("calloc failed in %s (%zu bytes)\n"),
2380                         __func__, num_bps * sizeof(struct xfs_buf*));
2381
2382         /* is this a block, leaf, or node directory? */
2383         args.dp = ip;
2384         args.geo = mp->m_dir_geo;
2385         libxfs_dir2_isblock(&args, &isblock);
2386         libxfs_dir2_isleaf(&args, &isleaf);
2387
2388         /* check directory "data" blocks (ie. name/inode pairs) */
2389         for (da_bno = 0, next_da_bno = 0;
2390              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2391              da_bno = (xfs_dablk_t)next_da_bno) {
2392                 const struct xfs_buf_ops *ops;
2393                 int                      error;
2394                 struct xfs_dir2_data_hdr *d;
2395
2396                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2397                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) {
2398                         /*
2399                          * if this is the first block, there isn't anything we
2400                          * can recover so we just trash it.
2401                          */
2402                          if (da_bno == 0) {
2403                                 fixit++;
2404                                 goto out_fix;
2405                         }
2406                         break;
2407                 }
2408
2409                 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2410                 if (db >= num_bps) {
2411                         /* more data blocks than expected */
2412                         num_bps = db + 1;
2413                         bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
2414                         if (!bplist)
2415                                 do_error(_("realloc failed in %s (%zu bytes)\n"),
2416                                         __func__,
2417                                         num_bps * sizeof(struct xfs_buf*));
2418                 }
2419
2420                 if (isblock)
2421                         ops = &xfs_dir3_block_buf_ops;
2422                 else
2423                         ops = &xfs_dir3_data_buf_ops;
2424
2425                 error = dir_read_buf(ip, da_bno, -1, &bplist[db], ops, &fixit);
2426                 if (error) {
2427                         do_warn(
2428         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2429                                 da_bno, ino, error);
2430                         *num_illegal += 1;
2431
2432                         /*
2433                          * we try to read all "data" blocks, but if we are in
2434                          * block form and we fail, there isn't anything else to
2435                          * read, and nothing we can do but trash it.
2436                          */
2437                         if (isblock) {
2438                                 fixit++;
2439                                 goto out_fix;
2440                         }
2441                         continue;
2442                 }
2443
2444                 /* check v5 metadata */
2445                 d = bplist[db]->b_addr;
2446                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2447                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2448                         struct xfs_buf           *bp = bplist[db];
2449
2450                         error = check_dir3_header(mp, bp, ino);
2451                         if (error) {
2452                                 fixit++;
2453                                 continue;
2454                         }
2455                 }
2456
2457                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2458                                 irec, ino_offset, &bplist[db], hashtab,
2459                                 &freetab, da_bno, isblock);
2460         }
2461         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2462
2463         if (!dotdot_update) {
2464                 /* check btree and freespace */
2465                 if (isblock) {
2466                         struct xfs_dir2_data_hdr *block;
2467                         xfs_dir2_block_tail_t   *btp;
2468                         xfs_dir2_leaf_entry_t   *blp;
2469
2470                         block = bplist[0]->b_addr;
2471                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2472                         blp = xfs_dir2_block_leaf_p(btp);
2473                         seeval = dir_hash_see_all(hashtab, blp,
2474                                                 be32_to_cpu(btp->count),
2475                                                 be32_to_cpu(btp->stale));
2476                         if (dir_hash_check(hashtab, ip, seeval))
2477                                 fixit |= 1;
2478                 } else if (isleaf) {
2479                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2480                                                                 freetab);
2481                 } else {
2482                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2483                                                                 freetab);
2484                 }
2485         }
2486 out_fix:
2487         if (!no_modify && (fixit || dotdot_update)) {
2488                 dir_hash_dup_names(hashtab);
2489                 for (i = 0; i < num_bps; i++)
2490                         if (bplist[i])
2491                                 libxfs_putbuf(bplist[i]);
2492                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2493                 *num_illegal = 0;
2494                 *need_dot = 0;
2495         } else {
2496                 for (i = 0; i < num_bps; i++)
2497                         if (bplist[i])
2498                                 libxfs_putbuf(bplist[i]);
2499         }
2500
2501         free(bplist);
2502         free(freetab);
2503 }
2504
2505 /*
2506  * shortform directory v2 processing routines -- entry verification and
2507  * bad entry deletion (pruning).
2508  */
2509 static struct xfs_dir2_sf_entry *
2510 shortform_dir2_junk(
2511         struct xfs_mount        *mp,
2512         struct xfs_dir2_sf_hdr  *sfp,
2513         struct xfs_dir2_sf_entry *sfep,
2514         xfs_ino_t               lino,
2515         int                     *max_size,
2516         int                     *index,
2517         int                     *bytes_deleted,
2518         int                     *ino_dirty)
2519 {
2520         struct xfs_dir2_sf_entry *next_sfep;
2521         int                     next_len;
2522         int                     next_elen;
2523
2524         if (lino == orphanage_ino)
2525                 orphanage_ino = 0;
2526
2527         next_elen = M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen);
2528         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2529
2530         /*
2531          * if we are just checking, simply return the pointer to the next entry
2532          * here so that the checking loop can continue.
2533          */
2534         if (no_modify) {
2535                 do_warn(_("would junk entry\n"));
2536                 return next_sfep;
2537         }
2538
2539         /*
2540          * now move all the remaining entries down over the junked entry and
2541          * clear the newly unused bytes at the tail of the directory region.
2542          */
2543         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2544         *max_size -= next_elen;
2545         *bytes_deleted += next_elen;
2546
2547         memmove(sfep, next_sfep, next_len);
2548         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2549         sfp->count -= 1;
2550         *ino_dirty = 1;
2551
2552         /*
2553          * WARNING:  drop the index i by one so it matches the decremented count
2554          * for accurate comparisons in the loop test
2555          */
2556         (*index)--;
2557
2558         if (verbose)
2559                 do_warn(_("junking entry\n"));
2560         else
2561                 do_warn("\n");
2562         return sfep;
2563 }
2564
2565 static void
2566 shortform_dir2_entry_check(xfs_mount_t  *mp,
2567                         xfs_ino_t       ino,
2568                         xfs_inode_t     *ip,
2569                         int             *ino_dirty,
2570                         ino_tree_node_t *current_irec,
2571                         int             current_ino_offset,
2572                         dir_hash_tab_t  *hashtab)
2573 {
2574         xfs_ino_t               lino;
2575         xfs_ino_t               parent;
2576         struct xfs_dir2_sf_hdr  *sfp;
2577         struct xfs_dir2_sf_entry *sfep;
2578         struct xfs_dir2_sf_entry *next_sfep;
2579         struct xfs_ifork        *ifp;
2580         struct ino_tree_node    *irec;
2581         int                     max_size;
2582         int                     ino_offset;
2583         int                     i;
2584         int                     bad_sfnamelen;
2585         int                     namelen;
2586         int                     bytes_deleted;
2587         char                    fname[MAXNAMELEN + 1];
2588         int                     i8;
2589
2590         ifp = &ip->i_df;
2591         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2592         *ino_dirty = 0;
2593         bytes_deleted = 0;
2594
2595         max_size = ifp->if_bytes;
2596         ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2597
2598         /*
2599          * if just rebuild a directory due to a "..", update and return
2600          */
2601         if (dotdot_update) {
2602                 parent = get_inode_parent(current_irec, current_ino_offset);
2603                 if (no_modify) {
2604                         do_warn(
2605         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2606                                 ino, parent);
2607                 } else {
2608                         do_warn(
2609         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2610                                 ino, parent);
2611                         M_DIROPS(mp)->sf_put_parent_ino(sfp, parent);
2612                         *ino_dirty = 1;
2613                 }
2614                 return;
2615         }
2616
2617         /*
2618          * no '.' entry in shortform dirs, just bump up ref count by 1
2619          * '..' was already (or will be) accounted for and checked when
2620          * the directory is reached or will be taken care of when the
2621          * directory is moved to orphanage.
2622          */
2623         add_inode_ref(current_irec, current_ino_offset);
2624
2625         /*
2626          * Initialise i8 counter -- the parent inode number counts as well.
2627          */
2628         i8 = M_DIROPS(mp)->sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2629
2630         /*
2631          * now run through entries, stop at first bad entry, don't need
2632          * to skip over '..' since that's encoded in its own field and
2633          * no need to worry about '.' since it doesn't exist.
2634          */
2635         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2636
2637         for (i = 0; i < sfp->count && max_size >
2638                                         (intptr_t)next_sfep - (intptr_t)sfp;
2639                         sfep = next_sfep, i++)  {
2640                 bad_sfnamelen = 0;
2641
2642                 lino = M_DIROPS(mp)->sf_get_ino(sfp, sfep);
2643
2644                 namelen = sfep->namelen;
2645
2646                 ASSERT(no_modify || namelen > 0);
2647
2648                 if (no_modify && namelen == 0)  {
2649                         /*
2650                          * if we're really lucky, this is
2651                          * the last entry in which case we
2652                          * can use the dir size to set the
2653                          * namelen value.  otherwise, forget
2654                          * it because we're not going to be
2655                          * able to find the next entry.
2656                          */
2657                         bad_sfnamelen = 1;
2658
2659                         if (i == sfp->count - 1)  {
2660                                 namelen = ip->i_d.di_size -
2661                                         ((intptr_t) &sfep->name[0] -
2662                                          (intptr_t) sfp);
2663                         } else  {
2664                                 /*
2665                                  * don't process the rest of the directory,
2666                                  * break out of processing loop
2667                                  */
2668                                 break;
2669                         }
2670                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2671                                 + M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)
2672                                 > ip->i_d.di_size)  {
2673                         bad_sfnamelen = 1;
2674
2675                         if (i == sfp->count - 1)  {
2676                                 namelen = ip->i_d.di_size -
2677                                         ((intptr_t) &sfep->name[0] -
2678                                          (intptr_t) sfp);
2679                         } else  {
2680                                 /*
2681                                  * don't process the rest of the directory,
2682                                  * break out of processing loop
2683                                  */
2684                                 break;
2685                         }
2686                 }
2687
2688                 memmove(fname, sfep->name, sfep->namelen);
2689                 fname[sfep->namelen] = '\0';
2690
2691                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2692                 ASSERT(no_modify || !verify_inum(mp, lino));
2693
2694                 /*
2695                  * Also skip entries with bogus inode numbers if we're
2696                  * in no modify mode.
2697                  */
2698
2699                 if (no_modify && verify_inum(mp, lino))  {
2700                         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2701                         continue;
2702                 }
2703
2704                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2705                                         XFS_INO_TO_AGINO(mp, lino));
2706
2707                 if (irec == NULL)  {
2708                         do_warn(
2709         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2710                                 fname, ino, lino);
2711                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2712                                                 &max_size, &i, &bytes_deleted,
2713                                                 ino_dirty);
2714                         continue;
2715                 }
2716
2717                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2718
2719                 /*
2720                  * if it's a free inode, blow out the entry.
2721                  * by now, any inode that we think is free
2722                  * really is free.
2723                  */
2724                 if (is_inode_free(irec, ino_offset))  {
2725                         do_warn(
2726         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2727                                 fname, ino, lino);
2728                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2729                                                 &max_size, &i, &bytes_deleted,
2730                                                 ino_dirty);
2731                         continue;
2732                 }
2733                 /*
2734                  * check if this inode is lost+found dir in the root
2735                  */
2736                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2737                         /*
2738                          * if it's not a directory, trash it
2739                          */
2740                         if (!inode_isadir(irec, ino_offset)) {
2741                                 do_warn(
2742         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2743                                         ORPHANAGE, lino, ino);
2744                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2745                                                 lino, &max_size, &i,
2746                                                 &bytes_deleted, ino_dirty);
2747                                 continue;
2748                         }
2749                         /*
2750                          * if this is a dup, it will be picked up below,
2751                          * otherwise, mark it as the orphanage for later.
2752                          */
2753                         if (!orphanage_ino)
2754                                 orphanage_ino = lino;
2755                 }
2756                 /*
2757                  * check for duplicate names in directory.
2758                  */
2759                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2760                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2761                                 lino, sfep->namelen, sfep->name,
2762                                 M_DIROPS(mp)->sf_get_ftype(sfep))) {
2763                         do_warn(
2764 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2765                                 fname, lino, ino);
2766                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2767                                                 &max_size, &i, &bytes_deleted,
2768                                                 ino_dirty);
2769                         continue;
2770                 }
2771
2772                 if (!inode_isadir(irec, ino_offset))  {
2773                         /*
2774                          * check easy case first, regular inode, just bump
2775                          * the link count
2776                          */
2777                         add_inode_reached(irec, ino_offset);
2778                 } else  {
2779                         parent = get_inode_parent(irec, ino_offset);
2780
2781                         /*
2782                          * bump up the link counts in parent and child.
2783                          * directory but if the link doesn't agree with
2784                          * the .. in the child, blow out the entry
2785                          */
2786                         if (is_inode_reached(irec, ino_offset))  {
2787                                 do_warn(
2788         _("entry \"%s\" in directory inode %" PRIu64
2789           " references already connected inode %" PRIu64 ".\n"),
2790                                         fname, ino, lino);
2791                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2792                                                 lino, &max_size, &i,
2793                                                 &bytes_deleted, ino_dirty);
2794                                 continue;
2795                         } else if (parent == ino)  {
2796                                 add_inode_reached(irec, ino_offset);
2797                                 add_inode_ref(current_irec, current_ino_offset);
2798                         } else if (parent == NULLFSINO) {
2799                                 /* ".." was missing, but this entry refers to it,
2800                                 so, set it as the parent and mark for rebuild */
2801                                 do_warn(
2802         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2803                                         fname, ino, lino);
2804                                 set_inode_parent(irec, ino_offset, ino);
2805                                 add_inode_reached(irec, ino_offset);
2806                                 add_inode_ref(current_irec, current_ino_offset);
2807                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2808                                                         irec, ino_offset);
2809                         } else  {
2810                                 do_warn(
2811         _("entry \"%s\" in directory inode %" PRIu64
2812           " not consistent with .. value (%" PRIu64
2813           ") in inode %" PRIu64 ",\n"),
2814                                         fname, ino, parent, lino);
2815                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2816                                                 lino, &max_size, &i,
2817                                                 &bytes_deleted, ino_dirty);
2818                                 continue;
2819                         }
2820                 }
2821
2822                 /* validate ftype field if supported */
2823                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2824                         uint8_t dir_ftype;
2825                         uint8_t ino_ftype;
2826
2827                         dir_ftype = M_DIROPS(mp)->sf_get_ftype(sfep);
2828                         ino_ftype = get_inode_ftype(irec, ino_offset);
2829
2830                         if (dir_ftype != ino_ftype) {
2831                                 if (no_modify) {
2832                                         do_warn(
2833         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2834                                                 dir_ftype, ino_ftype,
2835                                                 ino, lino);
2836                                 } else {
2837                                         do_warn(
2838         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2839                                                 dir_ftype, ino_ftype,
2840                                                 ino, lino);
2841                                         M_DIROPS(mp)->sf_put_ftype(sfep,
2842                                                                 ino_ftype);
2843                                         dir_hash_update_ftype(hashtab,
2844                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2845                                                               ino_ftype);
2846                                         *ino_dirty = 1;
2847                                 }
2848                         }
2849                 }
2850
2851                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2852                         i8++;
2853
2854                 /*
2855                  * go onto next entry - we have to take entries with bad namelen
2856                  * into account in no modify mode since we calculate size based
2857                  * on next_sfep.
2858                  */
2859                 ASSERT(no_modify || bad_sfnamelen == 0);
2860                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2861                               (bad_sfnamelen
2862                                 ? M_DIROPS(mp)->sf_entsize(sfp, namelen)
2863                                 : M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)));
2864         }
2865
2866         if (sfp->i8count != i8) {
2867                 if (no_modify) {
2868                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2869                                 ino);
2870                 } else {
2871                         if (i8 == 0) {
2872                                 struct xfs_dir2_sf_entry *tmp_sfep;
2873
2874                                 tmp_sfep = next_sfep;
2875                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2876                                 bytes_deleted +=
2877                                         (intptr_t)next_sfep -
2878                                         (intptr_t)tmp_sfep;
2879                                 next_sfep = tmp_sfep;
2880                         } else
2881                                 sfp->i8count = i8;
2882                         *ino_dirty = 1;
2883                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2884                                 ino);
2885                 }
2886         }
2887
2888         /*
2889          * sync up sizes if required
2890          */
2891         if (*ino_dirty && bytes_deleted > 0)  {
2892                 ASSERT(!no_modify);
2893                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2894                 ip->i_d.di_size -= bytes_deleted;
2895         }
2896
2897         if (ip->i_d.di_size != ip->i_df.if_bytes)  {
2898                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2899                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2900                 ip->i_d.di_size = (xfs_fsize_t)
2901                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2902                 do_warn(
2903         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2904                         ip->i_d.di_size);
2905                 *ino_dirty = 1;
2906         }
2907 }
2908
2909 /*
2910  * processes all reachable inodes in directories
2911  */
2912 static void
2913 process_dir_inode(
2914         xfs_mount_t             *mp,
2915         xfs_agnumber_t          agno,
2916         ino_tree_node_t         *irec,
2917         int                     ino_offset)
2918 {
2919         xfs_ino_t               ino;
2920         struct xfs_defer_ops            dfops;
2921         xfs_fsblock_t           first;
2922         xfs_inode_t             *ip;
2923         xfs_trans_t             *tp;
2924         dir_hash_tab_t          *hashtab;
2925         int                     need_dot;
2926         int                     dirty, num_illegal, error, nres;
2927
2928         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2929
2930         /*
2931          * open up directory inode, check all entries,
2932          * then call prune_dir_entries to remove all
2933          * remaining illegal directory entries.
2934          */
2935
2936         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2937
2938         error = -libxfs_iget(mp, NULL, ino, 0, &ip, &phase6_ifork_ops);
2939         if (error) {
2940                 if (!no_modify)
2941                         do_error(
2942         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2943                                 ino, error);
2944                 else  {
2945                         do_warn(
2946         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2947                                 ino, error);
2948                         /*
2949                          * see below for what we're doing if this
2950                          * is root.  Why do we need to do this here?
2951                          * to ensure that the root doesn't show up
2952                          * as being disconnected in the no_modify case.
2953                          */
2954                         if (mp->m_sb.sb_rootino == ino)  {
2955                                 add_inode_reached(irec, 0);
2956                                 add_inode_ref(irec, 0);
2957                         }
2958                 }
2959
2960                 add_inode_refchecked(irec, 0);
2961                 return;
2962         }
2963
2964         need_dot = dirty = num_illegal = 0;
2965
2966         if (mp->m_sb.sb_rootino == ino)  {
2967                 /*
2968                  * mark root inode reached and bump up
2969                  * link count for root inode to account
2970                  * for '..' entry since the root inode is
2971                  * never reached by a parent.  we know
2972                  * that root's '..' is always good --
2973                  * guaranteed by phase 3 and/or below.
2974                  */
2975                 add_inode_reached(irec, ino_offset);
2976         }
2977
2978         add_inode_refchecked(irec, ino_offset);
2979
2980         hashtab = dir_hash_init(ip->i_d.di_size);
2981
2982         /*
2983          * look for bogus entries
2984          */
2985         switch (ip->i_d.di_format)  {
2986                 case XFS_DINODE_FMT_EXTENTS:
2987                 case XFS_DINODE_FMT_BTREE:
2988                         /*
2989                          * also check for missing '.' in longform dirs.
2990                          * missing .. entries are added if required when
2991                          * the directory is connected to lost+found. but
2992                          * we need to create '.' entries here.
2993                          */
2994                         longform_dir2_entry_check(mp, ino, ip,
2995                                                 &num_illegal, &need_dot,
2996                                                 irec, ino_offset,
2997                                                 hashtab);
2998                         break;
2999
3000                 case XFS_DINODE_FMT_LOCAL:
3001                         /*
3002                          * using the remove reservation is overkill
3003                          * since at most we'll only need to log the
3004                          * inode but it's easier than wedging a
3005                          * new define in ourselves.
3006                          */
3007                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
3008                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
3009                                                     nres, 0, 0, &tp);
3010                         if (error)
3011                                 res_failed(error);
3012
3013                         libxfs_trans_ijoin(tp, ip, 0);
3014
3015                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
3016                                                 irec, ino_offset,
3017                                                 hashtab);
3018
3019                         ASSERT(dirty == 0 || (dirty && !no_modify));
3020                         if (dirty)  {
3021                                 libxfs_trans_log_inode(tp, ip,
3022                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
3023                                 libxfs_trans_commit(tp);
3024                         } else  {
3025                                 libxfs_trans_cancel(tp);
3026                         }
3027                         break;
3028
3029                 default:
3030                         break;
3031         }
3032         dir_hash_done(hashtab);
3033
3034         /*
3035          * if we have to create a .. for /, do it now *before*
3036          * we delete the bogus entries, otherwise the directory
3037          * could transform into a shortform dir which would
3038          * probably cause the simulation to choke.  Even
3039          * if the illegal entries get shifted around, it's ok
3040          * because the entries are structurally intact and in
3041          * in hash-value order so the simulation won't get confused
3042          * if it has to move them around.
3043          */
3044         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3045                 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
3046
3047                 do_warn(_("recreating root directory .. entry\n"));
3048
3049                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
3050                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3051                                             nres, 0, 0, &tp);
3052                 if (error)
3053                         res_failed(error);
3054
3055                 libxfs_trans_ijoin(tp, ip, 0);
3056
3057                 libxfs_defer_init(&dfops, &first);
3058
3059                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
3060                                         ip->i_ino, &first, &dfops, nres);
3061                 if (error)
3062                         do_error(
3063         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
3064
3065                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3066
3067                 libxfs_defer_ijoin(&dfops, ip);
3068                 error = -libxfs_defer_finish(&tp, &dfops);
3069                 ASSERT(error == 0);
3070                 libxfs_trans_commit(tp);
3071
3072                 need_root_dotdot = 0;
3073         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3074                 do_warn(_("would recreate root directory .. entry\n"));
3075         }
3076
3077         /*
3078          * if we need to create the '.' entry, do so only if
3079          * the directory is a longform dir.  if it's been
3080          * turned into a shortform dir, then the inode is ok
3081          * since shortform dirs have no '.' entry and the inode
3082          * has already been committed by prune_lf_dir_entry().
3083          */
3084         if (need_dot)  {
3085                 /*
3086                  * bump up our link count but don't
3087                  * bump up the inode link count.  chances
3088                  * are good that even though we lost '.'
3089                  * the inode link counts reflect '.' so
3090                  * leave the inode link count alone and if
3091                  * it turns out to be wrong, we'll catch
3092                  * that in phase 7.
3093                  */
3094                 add_inode_ref(irec, ino_offset);
3095
3096                 if (no_modify)  {
3097                         do_warn(
3098         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
3099                                 ino);
3100                 } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
3101                         /*
3102                          * need to create . entry in longform dir.
3103                          */
3104                         do_warn(
3105         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
3106
3107                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
3108                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3109                                                     nres, 0, 0, &tp);
3110                         if (error)
3111                                 res_failed(error);
3112
3113                         libxfs_trans_ijoin(tp, ip, 0);
3114
3115                         libxfs_defer_init(&dfops, &first);
3116
3117                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
3118                                         ip->i_ino, &first, &dfops, nres);
3119                         if (error)
3120                                 do_error(
3121         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
3122                                         ino, error);
3123
3124                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3125
3126                         libxfs_defer_ijoin(&dfops, ip);
3127                         error = -libxfs_defer_finish(&tp, &dfops);
3128                         ASSERT(error == 0);
3129                         libxfs_trans_commit(tp);
3130                 }
3131         }
3132         IRELE(ip);
3133 }
3134
3135 /*
3136  * mark realtime bitmap and summary inodes as reached.
3137  * quota inode will be marked here as well
3138  */
3139 static void
3140 mark_standalone_inodes(xfs_mount_t *mp)
3141 {
3142         ino_tree_node_t         *irec;
3143         int                     offset;
3144
3145         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3146                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3147
3148         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3149                         irec->ino_startnum;
3150
3151         add_inode_reached(irec, offset);
3152
3153         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3154                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3155
3156         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3157                         irec->ino_startnum;
3158
3159         add_inode_reached(irec, offset);
3160
3161         if (fs_quotas)  {
3162                 if (mp->m_sb.sb_uquotino
3163                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3164                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3165                                                 mp->m_sb.sb_uquotino),
3166                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3167                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3168                                         - irec->ino_startnum;
3169                         add_inode_reached(irec, offset);
3170                 }
3171                 if (mp->m_sb.sb_gquotino
3172                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3173                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3174                                                 mp->m_sb.sb_gquotino),
3175                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3176                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3177                                         - irec->ino_startnum;
3178                         add_inode_reached(irec, offset);
3179                 }
3180                 if (mp->m_sb.sb_pquotino
3181                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3182                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3183                                                 mp->m_sb.sb_pquotino),
3184                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3185                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3186                                         - irec->ino_startnum;
3187                         add_inode_reached(irec, offset);
3188                 }
3189         }
3190 }
3191
3192 static void
3193 check_for_orphaned_inodes(
3194         xfs_mount_t             *mp,
3195         xfs_agnumber_t          agno,
3196         ino_tree_node_t         *irec)
3197 {
3198         int                     i;
3199         xfs_ino_t               ino;
3200
3201         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3202                 ASSERT(is_inode_confirmed(irec, i));
3203                 if (is_inode_free(irec, i))
3204                         continue;
3205
3206                 if (is_inode_reached(irec, i))
3207                         continue;
3208
3209                 ASSERT(inode_isadir(irec, i) ||
3210                         num_inode_references(irec, i) == 0);
3211
3212                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3213                 if (inode_isadir(irec, i))
3214                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3215                 else
3216                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3217                 if (!no_modify)  {
3218                         if (!orphanage_ino)
3219                                 orphanage_ino = mk_orphanage(mp);
3220                         do_warn(_("moving to %s\n"), ORPHANAGE);
3221                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3222                 } else  {
3223                         do_warn(_("would move to %s\n"), ORPHANAGE);
3224                 }
3225                 /*
3226                  * for read-only case, even though the inode isn't
3227                  * really reachable, set the flag (and bump our link
3228                  * count) anyway to fool phase 7
3229                  */
3230                 add_inode_reached(irec, i);
3231         }
3232 }
3233
3234 static void
3235 traverse_function(
3236         struct workqueue        *wq,
3237         xfs_agnumber_t          agno,
3238         void                    *arg)
3239 {
3240         ino_tree_node_t         *irec;
3241         int                     i;
3242         prefetch_args_t         *pf_args = arg;
3243
3244         wait_for_inode_prefetch(pf_args);
3245
3246         if (verbose)
3247                 do_log(_("        - agno = %d\n"), agno);
3248
3249         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3250                 if (irec->ino_isa_dir == 0)
3251                         continue;
3252
3253                 if (pf_args) {
3254                         sem_post(&pf_args->ra_count);
3255 #ifdef XR_PF_TRACE
3256                         sem_getvalue(&pf_args->ra_count, &i);
3257                         pftrace(
3258                 "processing inode chunk %p in AG %d (sem count = %d)",
3259                                 irec, agno, i);
3260 #endif
3261                 }
3262
3263                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3264                         if (inode_isadir(irec, i))
3265                                 process_dir_inode(wq->wq_ctx, agno, irec, i);
3266                 }
3267         }
3268         cleanup_inode_prefetch(pf_args);
3269 }
3270
3271 static void
3272 update_missing_dotdot_entries(
3273         xfs_mount_t             *mp)
3274 {
3275         dotdot_update_t         *dir;
3276
3277         /*
3278          * these entries parents were updated, rebuild them again
3279          * set dotdot_update flag so processing routines do not count links
3280          */
3281         dotdot_update = 1;
3282         while (!list_empty(&dotdot_update_list)) {
3283                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3284                                  list);
3285                 list_del(&dir->list);
3286                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3287                 free(dir);
3288         }
3289 }
3290
3291 static void
3292 traverse_ags(
3293         struct xfs_mount        *mp)
3294 {
3295         do_inode_prefetch(mp, 0, traverse_function, false, true);
3296 }
3297
3298 void
3299 phase6(xfs_mount_t *mp)
3300 {
3301         ino_tree_node_t         *irec;
3302         int                     i;
3303
3304         memset(&zerocr, 0, sizeof(struct cred));
3305         memset(&zerofsx, 0, sizeof(struct fsxattr));
3306         orphanage_ino = 0;
3307
3308         do_log(_("Phase 6 - check inode connectivity...\n"));
3309
3310         incore_ext_teardown(mp);
3311
3312         add_ino_ex_data(mp);
3313
3314         /*
3315          * verify existence of root directory - if we have to
3316          * make one, it's ok for the incore data structs not to
3317          * know about it since everything about it (and the other
3318          * inodes in its chunk if a new chunk was created) are ok
3319          */
3320         if (need_root_inode)  {
3321                 if (!no_modify)  {
3322                         do_warn(_("reinitializing root directory\n"));
3323                         mk_root_dir(mp);
3324                         need_root_inode = 0;
3325                         need_root_dotdot = 0;
3326                 } else  {
3327                         do_warn(_("would reinitialize root directory\n"));
3328                 }
3329         }
3330
3331         if (need_rbmino)  {
3332                 if (!no_modify)  {
3333                         do_warn(_("reinitializing realtime bitmap inode\n"));
3334                         mk_rbmino(mp);
3335                         need_rbmino = 0;
3336                 } else  {
3337                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3338                 }
3339         }
3340
3341         if (need_rsumino)  {
3342                 if (!no_modify)  {
3343                         do_warn(_("reinitializing realtime summary inode\n"));
3344                         mk_rsumino(mp);
3345                         need_rsumino = 0;
3346                 } else  {
3347                         do_warn(_("would reinitialize realtime summary inode\n"));
3348                 }
3349         }
3350
3351         if (!no_modify)  {
3352                 do_log(
3353 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3354                 if (fill_rbmino(mp))  {
3355                         do_warn(
3356                         _("Warning:  realtime bitmap may be inconsistent\n"));
3357                 }
3358
3359                 if (fill_rsumino(mp))  {
3360                         do_warn(
3361                         _("Warning:  realtime bitmap may be inconsistent\n"));
3362                 }
3363         }
3364
3365         mark_standalone_inodes(mp);
3366
3367         do_log(_("        - traversing filesystem ...\n"));
3368
3369         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3370                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3371
3372         /*
3373          * we always have a root inode, even if it's free...
3374          * if the root is free, forget it, lost+found is already gone
3375          */
3376         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3377                 need_root_inode = 1;
3378         }
3379
3380         /*
3381          * then process all inodes by walking incore inode tree
3382          */
3383         traverse_ags(mp);
3384
3385         /*
3386          * any directories that had updated ".." entries, rebuild them now
3387          */
3388         update_missing_dotdot_entries(mp);
3389
3390         do_log(_("        - traversal finished ...\n"));
3391         do_log(_("        - moving disconnected inodes to %s ...\n"),
3392                 ORPHANAGE);
3393
3394         /*
3395          * move all disconnected inodes to the orphanage
3396          */
3397         for (i = 0; i < glob_agcount; i++)  {
3398                 irec = findfirst_inode_rec(i);
3399                 while (irec != NULL)  {
3400                         check_for_orphaned_inodes(mp, i, irec);
3401                         irec = next_ino_rec(irec);
3402                 }
3403         }
3404 }