repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "prefetch.h"
  10 #include "avl.h"
  11 #include "globals.h"
  12 #include "agheader.h"
  13 #include "incore.h"
  14 #include "dir2.h"
  15 #include "protos.h"
  16 #include "err_protos.h"
  17 #include "dinode.h"
  18 #include "progress.h"
  19 #include "versions.h"
  20
  21 static struct cred              zerocr;
  22 static struct fsxattr           zerofsx;
  23 static xfs_ino_t                orphanage_ino;
  24
  25 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  26                                                 1,
  27                                                 XFS_DIR3_FT_DIR};
  28
  29 /*
  30  * When we're checking directory inodes, we're allowed to set a directory's
  31  * dotdot entry to zero to signal that the parent needs to be reconnected
  32  * during phase 6.  If we're handling a shortform directory the ifork
  33  * verifiers will fail, so temporarily patch out this canary so that we can
  34  * verify the rest of the fork and move on to fixing the dir.
  35  */
  36 static xfs_failaddr_t
  37 phase6_verify_dir(
  38         struct xfs_inode                *ip)
  39 {
  40         struct xfs_mount                *mp = ip->i_mount;
  41         const struct xfs_dir_ops        *dops;
  42         struct xfs_ifork                *ifp;
  43         struct xfs_dir2_sf_hdr          *sfp;
  44         xfs_failaddr_t                  fa;
  45         xfs_ino_t                       old_parent;
  46         bool                            parent_bypass = false;
  47         int                             size;
  48
  49         dops = libxfs_dir_get_ops(mp, NULL);
  50
  51         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
  52         sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
  53         size = ifp->if_bytes;
  54
  55         /*
  56          * If this is a shortform directory, phase4 may have set the parent
  57          * inode to zero to indicate that it must be fixed.  Temporarily
  58          * set a valid parent so that the directory verifier will pass.
  59          */
  60         if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
  61             size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
  62                 old_parent = dops->sf_get_parent_ino(sfp);
  63                 if (old_parent == 0) {
  64                         dops->sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
  65                         parent_bypass = true;
  66                 }
  67         }
  68
  69         fa = libxfs_default_ifork_ops.verify_dir(ip);
  70
  71         /* Put it back. */
  72         if (parent_bypass)
  73                 dops->sf_put_parent_ino(sfp, old_parent);
  74
  75         return fa;
  76 }
  77
  78 static struct xfs_ifork_ops phase6_ifork_ops = {
  79         .verify_attr    = xfs_attr_shortform_verify,
  80         .verify_dir     = phase6_verify_dir,
  81         .verify_symlink = xfs_symlink_shortform_verify,
  82 };
  83
  84 /*
  85  * Data structures used to keep track of directories where the ".."
  86  * entries are updated. These must be rebuilt after the initial pass
  87  */
  88 typedef struct dotdot_update {
  89         struct list_head        list;
  90         ino_tree_node_t         *irec;
  91         xfs_agnumber_t          agno;
  92         int                     ino_offset;
  93 } dotdot_update_t;
  94
  95 static LIST_HEAD(dotdot_update_list);
  96 static int                      dotdot_update;
  97
  98 static void
  99 add_dotdot_update(
 100         xfs_agnumber_t          agno,
 101         ino_tree_node_t         *irec,
 102         int                     ino_offset)
 103 {
 104         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
 105
 106         if (!dir)
 107                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
 108                         sizeof(dotdot_update_t));
 109
 110         INIT_LIST_HEAD(&dir->list);
 111         dir->irec = irec;
 112         dir->agno = agno;
 113         dir->ino_offset = ino_offset;
 114
 115         list_add(&dir->list, &dotdot_update_list);
 116 }
 117
 118 /*
 119  * Data structures and routines to keep track of directory entries
 120  * and whether their leaf entry has been seen. Also used for name
 121  * duplicate checking and rebuilding step if required.
 122  */
 123 typedef struct dir_hash_ent {
 124         struct dir_hash_ent     *nextbyaddr;    /* next in addr bucket */
 125         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
 126         struct dir_hash_ent     *nextbyorder;   /* next in order added */
 127         xfs_dahash_t            hashval;        /* hash value of name */
 128         uint32_t                address;        /* offset of data entry */
 129         xfs_ino_t               inum;           /* inode num of entry */
 130         short                   junkit;         /* name starts with / */
 131         short                   seen;           /* have seen leaf entry */
 132         struct xfs_name         name;
 133 } dir_hash_ent_t;
 134
 135 typedef struct dir_hash_tab {
 136         int                     size;           /* size of hash tables */
 137         int                     names_duped;    /* 1 = ent names malloced */
 138         dir_hash_ent_t          *first;         /* ptr to first added entry */
 139         dir_hash_ent_t          *last;          /* ptr to last added entry */
 140         dir_hash_ent_t          **byhash;       /* ptr to name hash buckets */
 141         dir_hash_ent_t          **byaddr;       /* ptr to addr hash buckets */
 142 } dir_hash_tab_t;
 143
 144 #define DIR_HASH_TAB_SIZE(n)    \
 145         (sizeof(dir_hash_tab_t) + (sizeof(dir_hash_ent_t *) * (n) * 2))
 146 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
 147
 148 /*
 149  * Track the contents of the freespace table in a directory.
 150  */
 151 typedef struct freetab {
 152         int                     naents; /* expected number of data blocks */
 153         int                     nents;  /* number of data blocks processed */
 154         struct freetab_ent {
 155                 xfs_dir2_data_off_t     v;
 156                 short                   s;
 157         } ents[1];
 158 } freetab_t;
 159 #define FREETAB_SIZE(n) \
 160         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 161
 162 #define DIR_HASH_CK_OK          0
 163 #define DIR_HASH_CK_DUPLEAF     1
 164 #define DIR_HASH_CK_BADHASH     2
 165 #define DIR_HASH_CK_NODATA      3
 166 #define DIR_HASH_CK_NOLEAF      4
 167 #define DIR_HASH_CK_BADSTALE    5
 168 #define DIR_HASH_CK_TOTAL       6
 169
 170 /*
 171  * Need to handle CRC and validation errors specially here. If there is a
 172  * validator error, re-read without the verifier so that we get a buffer we can
 173  * check and repair. Re-attach the ops to the buffer after the read so that when
 174  * it is rewritten the CRC is recalculated.
 175  *
 176  * If the buffer was not read, we return an error. If the buffer was read but
 177  * had a CRC or corruption error, we reread it without the verifier and if it is
 178  * read successfully we increment *crc_error and return 0. Otherwise we
 179  * return the read error.
 180  */
 181 static int
 182 dir_read_buf(
 183         struct xfs_inode        *ip,
 184         xfs_dablk_t             bno,
 185         xfs_daddr_t             mappedbno,
 186         struct xfs_buf          **bpp,
 187         const struct xfs_buf_ops *ops,
 188         int                     *crc_error)
 189 {
 190         int error;
 191         int error2;
 192
 193         error = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 194                                    XFS_DATA_FORK, ops);
 195
 196         if (error != EFSBADCRC && error != EFSCORRUPTED)
 197                 return error;
 198
 199         error2 = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 200                                    XFS_DATA_FORK, NULL);
 201         if (error2)
 202                 return error2;
 203
 204         (*crc_error)++;
 205         (*bpp)->b_ops = ops;
 206         return 0;
 207 }
 208
 209 /*
 210  * Returns 0 if the name already exists (ie. a duplicate)
 211  */
 212 static int
 213 dir_hash_add(
 214         xfs_mount_t             *mp,
 215         dir_hash_tab_t          *hashtab,
 216         uint32_t                addr,
 217         xfs_ino_t               inum,
 218         int                     namelen,
 219         unsigned char           *name,
 220         uint8_t                 ftype)
 221 {
 222         xfs_dahash_t            hash = 0;
 223         int                     byaddr;
 224         int                     byhash = 0;
 225         dir_hash_ent_t          *p;
 226         int                     dup;
 227         short                   junk;
 228         struct xfs_name         xname;
 229
 230         ASSERT(!hashtab->names_duped);
 231
 232         xname.name = name;
 233         xname.len = namelen;
 234         xname.type = ftype;
 235
 236         junk = name[0] == '/';
 237         byaddr = DIR_HASH_FUNC(hashtab, addr);
 238         dup = 0;
 239
 240         if (!junk) {
 241                 hash = mp->m_dirnameops->hashname(&xname);
 242                 byhash = DIR_HASH_FUNC(hashtab, hash);
 243
 244                 /*
 245                  * search hash bucket for existing name.
 246                  */
 247                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 248                         if (p->hashval == hash && p->name.len == namelen) {
 249                                 if (memcmp(p->name.name, name, namelen) == 0) {
 250                                         dup = 1;
 251                                         junk = 1;
 252                                         break;
 253                                 }
 254                         }
 255                 }
 256         }
 257
 258         if ((p = malloc(sizeof(*p))) == NULL)
 259                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 260                         sizeof(*p));
 261
 262         p->nextbyaddr = hashtab->byaddr[byaddr];
 263         hashtab->byaddr[byaddr] = p;
 264         if (hashtab->last)
 265                 hashtab->last->nextbyorder = p;
 266         else
 267                 hashtab->first = p;
 268         p->nextbyorder = NULL;
 269         hashtab->last = p;
 270
 271         if (!(p->junkit = junk)) {
 272                 p->hashval = hash;
 273                 p->nextbyhash = hashtab->byhash[byhash];
 274                 hashtab->byhash[byhash] = p;
 275         }
 276         p->address = addr;
 277         p->inum = inum;
 278         p->seen = 0;
 279         p->name = xname;
 280
 281         return !dup;
 282 }
 283
 284 /*
 285  * checks to see if any data entries are not in the leaf blocks
 286  */
 287 static int
 288 dir_hash_unseen(
 289         dir_hash_tab_t  *hashtab)
 290 {
 291         int             i;
 292         dir_hash_ent_t  *p;
 293
 294         for (i = 0; i < hashtab->size; i++) {
 295                 for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 296                         if (p->seen == 0)
 297                                 return 1;
 298                 }
 299         }
 300         return 0;
 301 }
 302
 303 static int
 304 dir_hash_check(
 305         dir_hash_tab_t  *hashtab,
 306         xfs_inode_t     *ip,
 307         int             seeval)
 308 {
 309         static char     *seevalstr[DIR_HASH_CK_TOTAL];
 310         static int      done;
 311
 312         if (!done) {
 313                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 314                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 315                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 316                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 317                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 318                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 319                 done = 1;
 320         }
 321
 322         if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
 323                 seeval = DIR_HASH_CK_NOLEAF;
 324         if (seeval == DIR_HASH_CK_OK)
 325                 return 0;
 326         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 327                 ip->i_ino, seevalstr[seeval]);
 328         if (!no_modify)
 329                 do_warn(_("rebuilding\n"));
 330         else
 331                 do_warn(_("would rebuild\n"));
 332         return 1;
 333 }
 334
 335 static void
 336 dir_hash_done(
 337         dir_hash_tab_t  *hashtab)
 338 {
 339         int             i;
 340         dir_hash_ent_t  *n;
 341         dir_hash_ent_t  *p;
 342
 343         for (i = 0; i < hashtab->size; i++) {
 344                 for (p = hashtab->byaddr[i]; p; p = n) {
 345                         n = p->nextbyaddr;
 346                         if (hashtab->names_duped)
 347                                 free((void *)p->name.name);
 348                         free(p);
 349                 }
 350         }
 351         free(hashtab);
 352 }
 353
 354 static dir_hash_tab_t *
 355 dir_hash_init(
 356         xfs_fsize_t     size)
 357 {
 358         dir_hash_tab_t  *hashtab;
 359         int             hsize;
 360
 361         hsize = size / (16 * 4);
 362         if (hsize > 65536)
 363                 hsize = 63336;
 364         else if (hsize < 16)
 365                 hsize = 16;
 366         if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
 367                 do_error(_("calloc failed in dir_hash_init\n"));
 368         hashtab->size = hsize;
 369         hashtab->byhash = (dir_hash_ent_t**)((char *)hashtab +
 370                 sizeof(dir_hash_tab_t));
 371         hashtab->byaddr = (dir_hash_ent_t**)((char *)hashtab +
 372                 sizeof(dir_hash_tab_t) + sizeof(dir_hash_ent_t*) * hsize);
 373         return hashtab;
 374 }
 375
 376 static int
 377 dir_hash_see(
 378         dir_hash_tab_t          *hashtab,
 379         xfs_dahash_t            hash,
 380         xfs_dir2_dataptr_t      addr)
 381 {
 382         int                     i;
 383         dir_hash_ent_t          *p;
 384
 385         i = DIR_HASH_FUNC(hashtab, addr);
 386         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 387                 if (p->address != addr)
 388                         continue;
 389                 if (p->seen)
 390                         return DIR_HASH_CK_DUPLEAF;
 391                 if (p->junkit == 0 && p->hashval != hash)
 392                         return DIR_HASH_CK_BADHASH;
 393                 p->seen = 1;
 394                 return DIR_HASH_CK_OK;
 395         }
 396         return DIR_HASH_CK_NODATA;
 397 }
 398
 399 static void
 400 dir_hash_update_ftype(
 401         dir_hash_tab_t          *hashtab,
 402         xfs_dir2_dataptr_t      addr,
 403         uint8_t                 ftype)
 404 {
 405         int                     i;
 406         dir_hash_ent_t          *p;
 407
 408         i = DIR_HASH_FUNC(hashtab, addr);
 409         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 410                 if (p->address != addr)
 411                         continue;
 412                 p->name.type = ftype;
 413         }
 414 }
 415
 416 /*
 417  * checks to make sure leafs match a data entry, and that the stale
 418  * count is valid.
 419  */
 420 static int
 421 dir_hash_see_all(
 422         dir_hash_tab_t          *hashtab,
 423         xfs_dir2_leaf_entry_t   *ents,
 424         int                     count,
 425         int                     stale)
 426 {
 427         int                     i;
 428         int                     j;
 429         int                     rval;
 430
 431         for (i = j = 0; i < count; i++) {
 432                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 433                         j++;
 434                         continue;
 435                 }
 436                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 437                                         be32_to_cpu(ents[i].address));
 438                 if (rval != DIR_HASH_CK_OK)
 439                         return rval;
 440         }
 441         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 442 }
 443
 444 /*
 445  * Convert name pointers into locally allocated memory.
 446  * This must only be done after all the entries have been added.
 447  */
 448 static void
 449 dir_hash_dup_names(dir_hash_tab_t *hashtab)
 450 {
 451         unsigned char           *name;
 452         dir_hash_ent_t          *p;
 453
 454         if (hashtab->names_duped)
 455                 return;
 456
 457         for (p = hashtab->first; p; p = p->nextbyorder) {
 458                 name = malloc(p->name.len);
 459                 memcpy(name, p->name.name, p->name.len);
 460                 p->name.name = name;
 461         }
 462         hashtab->names_duped = 1;
 463 }
 464
 465 /*
 466  * Given a block number in a fork, return the next valid block number
 467  * (not a hole).
 468  * If this is the last block number then NULLFILEOFF is returned.
 469  *
 470  * This was originally in the kernel, but only used in xfs_repair.
 471  */
 472 static int
 473 bmap_next_offset(
 474         xfs_trans_t     *tp,                    /* transaction pointer */
 475         xfs_inode_t     *ip,                    /* incore inode */
 476         xfs_fileoff_t   *bnop,                  /* current block */
 477         int             whichfork)              /* data or attr fork */
 478 {
 479         xfs_fileoff_t   bno;                    /* current block */
 480         int             error;                  /* error return value */
 481         xfs_bmbt_irec_t got;                    /* current extent value */
 482         xfs_ifork_t     *ifp;                   /* inode fork pointer */
 483         struct xfs_iext_cursor  icur;
 484
 485         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 486             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 487             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
 488                return EIO;
 489         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 490                 *bnop = NULLFILEOFF;
 491                 return 0;
 492         }
 493         ifp = XFS_IFORK_PTR(ip, whichfork);
 494         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 495             (error = -libxfs_iread_extents(tp, ip, whichfork)))
 496                 return error;
 497         bno = *bnop + 1;
 498         if (!libxfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
 499                 *bnop = NULLFILEOFF;
 500         else
 501                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 502         return 0;
 503 }
 504
 505
 506 static void
 507 res_failed(
 508         int     err)
 509 {
 510         if (err == ENOSPC) {
 511                 do_error(_("ran out of disk space!\n"));
 512         } else
 513                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 514 }
 515
 516 void
 517 mk_rbmino(xfs_mount_t *mp)
 518 {
 519         xfs_trans_t     *tp;
 520         xfs_inode_t     *ip;
 521         xfs_bmbt_irec_t *ep;
 522         xfs_fsblock_t   first;
 523         int             i;
 524         int             nmap;
 525         int             error;
 526         struct xfs_defer_ops    dfops;
 527         xfs_fileoff_t   bno;
 528         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 529         int             vers;
 530         int             times;
 531         uint            blocks;
 532
 533         /*
 534          * first set up inode
 535          */
 536         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 537         if (i)
 538                 res_failed(i);
 539
 540         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 541         if (error) {
 542                 do_error(
 543                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 544                         error);
 545         }
 546
 547         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 548         memset(&ip->i_d, 0, sizeof(ip->i_d));
 549
 550         VFS_I(ip)->i_mode = S_IFREG;
 551         ip->i_d.di_version = vers;
 552         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 553         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 554
 555         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 556
 557         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 558         if (ip->i_d.di_version == 3) {
 559                 VFS_I(ip)->i_version = 1;
 560                 ip->i_d.di_flags2 = 0;
 561                 times |= XFS_ICHGTIME_CREATE;
 562         }
 563         libxfs_trans_ichgtime(tp, ip, times);
 564
 565         /*
 566          * now the ifork
 567          */
 568         ip->i_df.if_flags = XFS_IFEXTENTS;
 569         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 570         ip->i_df.if_u1.if_root = NULL;
 571
 572         ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 573
 574         /*
 575          * commit changes
 576          */
 577         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 578         libxfs_trans_commit(tp);
 579
 580         /*
 581          * then allocate blocks for file and fill with zeroes (stolen
 582          * from mkfs)
 583          */
 584         blocks = mp->m_sb.sb_rbmblocks +
 585                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 586         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 587         if (error)
 588                 res_failed(error);
 589
 590         libxfs_trans_ijoin(tp, ip, 0);
 591         bno = 0;
 592         libxfs_defer_init(tp, &dfops, &first);
 593         while (bno < mp->m_sb.sb_rbmblocks) {
 594                 nmap = XFS_BMAP_MAX_NMAP;
 595                 error = -libxfs_bmapi_write(tp, ip, bno,
 596                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 597                           0, &first, mp->m_sb.sb_rbmblocks, map, &nmap);
 598                 if (error) {
 599                         do_error(
 600                         _("couldn't allocate realtime bitmap, error = %d\n"),
 601                                 error);
 602                 }
 603                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 604                         libxfs_device_zero(mp->m_ddev_targp,
 605                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 606                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 607                         bno += ep->br_blockcount;
 608                 }
 609         }
 610         libxfs_defer_ijoin(&dfops, ip);
 611         error = -libxfs_defer_finish(&tp, &dfops);
 612         if (error) {
 613                 do_error(
 614                 _("allocation of the realtime bitmap failed, error = %d\n"),
 615                         error);
 616         }
 617         libxfs_trans_commit(tp);
 618         IRELE(ip);
 619 }
 620
 621 static int
 622 fill_rbmino(xfs_mount_t *mp)
 623 {
 624         xfs_buf_t       *bp;
 625         xfs_trans_t     *tp;
 626         xfs_inode_t     *ip;
 627         xfs_rtword_t    *bmp;
 628         xfs_fsblock_t   first;
 629         int             nmap;
 630         int             error;
 631         xfs_fileoff_t   bno;
 632         xfs_bmbt_irec_t map;
 633
 634         bmp = btmcompute;
 635         bno = 0;
 636
 637         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 638         if (error)
 639                 res_failed(error);
 640
 641         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 642         if (error) {
 643                 do_error(
 644                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 645                         error);
 646         }
 647
 648         first = NULLFSBLOCK;
 649         while (bno < mp->m_sb.sb_rbmblocks)  {
 650                 /*
 651                  * fill the file one block at a time
 652                  */
 653                 nmap = 1;
 654                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 655                                         &first, 1, &map, &nmap);
 656                 if (error || nmap != 1) {
 657                         do_error(
 658         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 659                                 bno, error);
 660                 }
 661
 662                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 663
 664                 error = -libxfs_trans_read_buf(
 665                                 mp, tp, mp->m_dev,
 666                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 667                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 668
 669                 if (error) {
 670                         do_warn(
 671 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 672                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 673                         return(1);
 674                 }
 675
 676                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 677
 678                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 679
 680                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 681                 bno++;
 682         }
 683
 684         libxfs_trans_commit(tp);
 685         IRELE(ip);
 686         return(0);
 687 }
 688
 689 static int
 690 fill_rsumino(xfs_mount_t *mp)
 691 {
 692         xfs_buf_t       *bp;
 693         xfs_trans_t     *tp;
 694         xfs_inode_t     *ip;
 695         xfs_suminfo_t   *smp;
 696         xfs_fsblock_t   first;
 697         int             nmap;
 698         int             error;
 699         xfs_fileoff_t   bno;
 700         xfs_fileoff_t   end_bno;
 701         xfs_bmbt_irec_t map;
 702
 703         smp = sumcompute;
 704         bno = 0;
 705         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 706
 707         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 708         if (error)
 709                 res_failed(error);
 710
 711         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 712         if (error) {
 713                 do_error(
 714                 _("couldn't iget realtime summary inode -- error - %d\n"),
 715                         error);
 716         }
 717
 718         first = NULLFSBLOCK;
 719         while (bno < end_bno)  {
 720                 /*
 721                  * fill the file one block at a time
 722                  */
 723                 nmap = 1;
 724                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 725                                         &first, 1, &map, &nmap);
 726                 if (error || nmap != 1) {
 727                         do_error(
 728         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 729                                 bno, error);
 730                 }
 731
 732                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 733
 734                 error = -libxfs_trans_read_buf(
 735                                 mp, tp, mp->m_dev,
 736                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 737                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 738
 739                 if (error) {
 740                         do_warn(
 741 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 742                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 743                         IRELE(ip);
 744                         return(1);
 745                 }
 746
 747                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 748
 749                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 750
 751                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 752                 bno++;
 753         }
 754
 755         libxfs_trans_commit(tp);
 756         IRELE(ip);
 757         return(0);
 758 }
 759
 760 static void
 761 mk_rsumino(xfs_mount_t *mp)
 762 {
 763         xfs_trans_t     *tp;
 764         xfs_inode_t     *ip;
 765         xfs_bmbt_irec_t *ep;
 766         xfs_fsblock_t   first;
 767         int             i;
 768         int             nmap;
 769         int             error;
 770         int             nsumblocks;
 771         struct xfs_defer_ops    dfops;
 772         xfs_fileoff_t   bno;
 773         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 774         int             vers;
 775         int             times;
 776         uint            blocks;
 777
 778         /*
 779          * first set up inode
 780          */
 781         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 782         if (i)
 783                 res_failed(i);
 784
 785         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 786         if (error) {
 787                 do_error(
 788                 _("couldn't iget realtime summary inode -- error - %d\n"),
 789                         error);
 790         }
 791
 792         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 793         memset(&ip->i_d, 0, sizeof(ip->i_d));
 794
 795         VFS_I(ip)->i_mode = S_IFREG;
 796         ip->i_d.di_version = vers;
 797         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 798         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 799
 800         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 801
 802         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 803         if (ip->i_d.di_version == 3) {
 804                 VFS_I(ip)->i_version = 1;
 805                 ip->i_d.di_flags2 = 0;
 806                 times |= XFS_ICHGTIME_CREATE;
 807         }
 808         libxfs_trans_ichgtime(tp, ip, times);
 809
 810         /*
 811          * now the ifork
 812          */
 813         ip->i_df.if_flags = XFS_IFEXTENTS;
 814         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 815         ip->i_df.if_u1.if_root = NULL;
 816
 817         ip->i_d.di_size = mp->m_rsumsize;
 818
 819         /*
 820          * commit changes
 821          */
 822         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 823         libxfs_trans_commit(tp);
 824
 825         /*
 826          * then allocate blocks for file and fill with zeroes (stolen
 827          * from mkfs)
 828          */
 829         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 830         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 831         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 832         if (error)
 833                 res_failed(error);
 834
 835         libxfs_trans_ijoin(tp, ip, 0);
 836         bno = 0;
 837         libxfs_defer_init(tp, &dfops, &first);
 838         while (bno < nsumblocks) {
 839                 nmap = XFS_BMAP_MAX_NMAP;
 840                 error = -libxfs_bmapi_write(tp, ip, bno,
 841                           (xfs_extlen_t)(nsumblocks - bno),
 842                           0, &first, nsumblocks, map, &nmap);
 843                 if (error) {
 844                         do_error(
 845                 _("couldn't allocate realtime summary inode, error = %d\n"),
 846                                 error);
 847                 }
 848                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 849                         libxfs_device_zero(mp->m_ddev_targp,
 850                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 851                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 852                         bno += ep->br_blockcount;
 853                 }
 854         }
 855         libxfs_defer_ijoin(&dfops, ip);
 856         error = -libxfs_defer_finish(&tp, &dfops);
 857         if (error) {
 858                 do_error(
 859         _("allocation of the realtime summary ino failed, error = %d\n"),
 860                         error);
 861         }
 862         libxfs_trans_commit(tp);
 863         IRELE(ip);
 864 }
 865
 866 /*
 867  * makes a new root directory.
 868  */
 869 static void
 870 mk_root_dir(xfs_mount_t *mp)
 871 {
 872         xfs_trans_t     *tp;
 873         xfs_inode_t     *ip;
 874         int             i;
 875         int             error;
 876         const mode_t    mode = 0755;
 877         ino_tree_node_t *irec;
 878         int             vers;
 879         int             times;
 880
 881         ip = NULL;
 882         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 883         if (i)
 884                 res_failed(i);
 885
 886         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, 0, &ip);
 887         if (error) {
 888                 do_error(_("could not iget root inode -- error - %d\n"), error);
 889         }
 890
 891         /*
 892          * take care of the core -- initialization from xfs_ialloc()
 893          */
 894         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 895         memset(&ip->i_d, 0, sizeof(ip->i_d));
 896
 897         VFS_I(ip)->i_mode = mode|S_IFDIR;
 898         ip->i_d.di_version = vers;
 899         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 900         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 901
 902         set_nlink(VFS_I(ip), 1);        /* account for . */
 903
 904         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 905         if (ip->i_d.di_version == 3) {
 906                 VFS_I(ip)->i_version = 1;
 907                 ip->i_d.di_flags2 = 0;
 908                 times |= XFS_ICHGTIME_CREATE;
 909         }
 910         libxfs_trans_ichgtime(tp, ip, times);
 911
 912         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 913
 914         /*
 915          * now the ifork
 916          */
 917         ip->i_df.if_flags = XFS_IFEXTENTS;
 918         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 919         ip->i_df.if_u1.if_root = NULL;
 920
 921
 922
 923         /*
 924          * initialize the directory
 925          */
 926         ip->d_ops = mp->m_dir_inode_ops;
 927         libxfs_dir_init(tp, ip, ip);
 928
 929         libxfs_trans_commit(tp);
 930         IRELE(ip);
 931
 932         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 933                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 934         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 935                                 irec->ino_startnum);
 936 }
 937
 938 /*
 939  * orphanage name == lost+found
 940  */
 941 static xfs_ino_t
 942 mk_orphanage(xfs_mount_t *mp)
 943 {
 944         xfs_ino_t       ino;
 945         xfs_trans_t     *tp;
 946         xfs_inode_t     *ip;
 947         xfs_inode_t     *pip;
 948         xfs_fsblock_t   first;
 949         ino_tree_node_t *irec;
 950         int             ino_offset = 0;
 951         int             i;
 952         int             error;
 953         struct xfs_defer_ops    dfops;
 954         const int       mode = 0755;
 955         int             nres;
 956         struct xfs_name xname;
 957
 958         /*
 959          * check for an existing lost+found first, if it exists, return
 960          * its inode. Otherwise, we can create it. Bad lost+found inodes
 961          * would have been cleared in phase3 and phase4.
 962          */
 963
 964         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 965                         &xfs_default_ifork_ops);
 966         if (i)
 967                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 968                         i, ORPHANAGE);
 969
 970         xname.name = (unsigned char *)ORPHANAGE;
 971         xname.len = strlen(ORPHANAGE);
 972         xname.type = XFS_DIR3_FT_DIR;
 973
 974         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 975                 return ino;
 976
 977         /*
 978          * could not be found, create it
 979          */
 980         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 981         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 982         if (i)
 983                 res_failed(i);
 984         libxfs_defer_init(tp, &dfops, &first);
 985
 986         /*
 987          * use iget/ijoin instead of trans_iget because the ialloc
 988          * wrapper can commit the transaction and start a new one
 989          */
 990 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 991                         &xfs_default_ifork_ops);
 992         if (i)
 993                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 994                         i, ORPHANAGE);*/
 995
 996         error = -libxfs_inode_alloc(&tp, pip, mode|S_IFDIR,
 997                                         1, 0, &zerocr, &zerofsx, &ip);
 998         if (error) {
 999                 do_error(_("%s inode allocation failed %d\n"),
1000                         ORPHANAGE, error);
1001         }
1002         inc_nlink(VFS_I(ip));           /* account for . */
1003         ino = ip->i_ino;
1004
1005         irec = find_inode_rec(mp,
1006                         XFS_INO_TO_AGNO(mp, ino),
1007                         XFS_INO_TO_AGINO(mp, ino));
1008
1009         if (irec == NULL) {
1010                 /*
1011                  * This inode is allocated from a newly created inode
1012                  * chunk and therefore did not exist when inode chunks
1013                  * were processed in phase3. Add this group of inodes to
1014                  * the entry avl tree as if they were discovered in phase3.
1015                  */
1016                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
1017                                             XFS_INO_TO_AGINO(mp, ino));
1018                 alloc_ex_data(irec);
1019
1020                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
1021                         set_inode_free(irec, i);
1022         }
1023
1024         ino_offset = get_inode_offset(mp, ino, irec);
1025
1026         /*
1027          * Mark the inode allocated to lost+found as used in the AVL tree
1028          * so it is not skipped in phase 7
1029          */
1030         set_inode_used(irec, ino_offset);
1031         add_inode_ref(irec, ino_offset);
1032
1033         /*
1034          * now that we know the transaction will stay around,
1035          * add the root inode to it
1036          */
1037         libxfs_trans_ijoin(tp, pip, 0);
1038
1039         /*
1040          * create the actual entry
1041          */
1042         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
1043         if (error)
1044                 do_error(
1045                 _("can't make %s, createname error %d\n"),
1046                         ORPHANAGE, error);
1047
1048         /*
1049          * bump up the link count in the root directory to account
1050          * for .. in the new directory
1051          */
1052         inc_nlink(VFS_I(pip));
1053         add_inode_ref(find_inode_rec(mp,
1054                                 XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
1055                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)), 0);
1056
1057
1058
1059         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
1060         libxfs_dir_init(tp, ip, pip);
1061         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1062
1063         libxfs_defer_ijoin(&dfops, ip);
1064         error = -libxfs_defer_finish(&tp, &dfops);
1065         if (error) {
1066                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1067                         ORPHANAGE, error);
1068         }
1069
1070
1071         libxfs_trans_commit(tp);
1072         IRELE(ip);
1073         IRELE(pip);
1074         add_inode_reached(irec,ino_offset);
1075
1076         return(ino);
1077 }
1078
1079 /*
1080  * move a file to the orphange.
1081  */
1082 static void
1083 mv_orphanage(
1084         xfs_mount_t             *mp,
1085         xfs_ino_t               ino,            /* inode # to be moved */
1086         int                     isa_dir)        /* 1 if inode is a directory */
1087 {
1088         xfs_inode_t             *orphanage_ip;
1089         xfs_ino_t               entry_ino_num;
1090         xfs_inode_t             *ino_p;
1091         xfs_trans_t             *tp;
1092         xfs_fsblock_t           first;
1093         struct xfs_defer_ops            dfops;
1094         int                     err;
1095         unsigned char           fname[MAXPATHLEN + 1];
1096         int                     nres;
1097         int                     incr;
1098         ino_tree_node_t         *irec;
1099         int                     ino_offset = 0;
1100         struct xfs_name         xname;
1101
1102         xname.name = fname;
1103         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1104                                 (unsigned long long)ino);
1105
1106         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip,
1107                         &xfs_default_ifork_ops);
1108         if (err)
1109                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1110         /*
1111          * Make sure the filename is unique in the lost+found
1112          */
1113         incr = 0;
1114         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1115                                                                 NULL) == 0)
1116                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1117                                         (unsigned long long)ino, ++incr);
1118
1119         /* Orphans may not have a proper parent, so use custom ops here */
1120         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &phase6_ifork_ops);
1121         if (err)
1122                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1123
1124         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1125
1126         if (isa_dir)  {
1127                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1128                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1129                 if (irec)
1130                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1131                                         irec->ino_startnum;
1132                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1133                        XFS_DIRENTER_SPACE_RES(mp, 2);
1134                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1135                                         &entry_ino_num, NULL);
1136                 if (err) {
1137                         ASSERT(err == ENOENT);
1138
1139                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1140                                                   nres, 0, 0, &tp);
1141                         if (err)
1142                                 do_error(
1143         _("space reservation failed (%d), filesystem may be out of space\n"),
1144                                         err);
1145
1146                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1147                         libxfs_trans_ijoin(tp, ino_p, 0);
1148
1149                         libxfs_defer_init(tp, &dfops, &first);
1150                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1151                                                 ino, nres);
1152                         if (err)
1153                                 do_error(
1154         _("name create failed in %s (%d), filesystem may be out of space\n"),
1155                                         ORPHANAGE, err);
1156
1157                         if (irec)
1158                                 add_inode_ref(irec, ino_offset);
1159                         else
1160                                 inc_nlink(VFS_I(orphanage_ip));
1161                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1162
1163                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1164                                         orphanage_ino, nres);
1165                         if (err)
1166                                 do_error(
1167         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1168                                         err);
1169
1170                         inc_nlink(VFS_I(ino_p));
1171                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1172
1173                         libxfs_defer_ijoin(&dfops, ino_p);
1174                         err = -libxfs_defer_finish(&tp, &dfops);
1175                         if (err)
1176                                 do_error(
1177         _("bmap finish failed (err - %d), filesystem may be out of space\n"),
1178                                         err);
1179
1180                         libxfs_trans_commit(tp);
1181                 } else  {
1182                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1183                                                   nres, 0, 0, &tp);
1184                         if (err)
1185                                 do_error(
1186         _("space reservation failed (%d), filesystem may be out of space\n"),
1187                                         err);
1188
1189                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1190                         libxfs_trans_ijoin(tp, ino_p, 0);
1191
1192                         libxfs_defer_init(tp, &dfops, &first);
1193
1194                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1195                                                 ino, nres);
1196                         if (err)
1197                                 do_error(
1198         _("name create failed in %s (%d), filesystem may be out of space\n"),
1199                                         ORPHANAGE, err);
1200
1201                         if (irec)
1202                                 add_inode_ref(irec, ino_offset);
1203                         else
1204                                 inc_nlink(VFS_I(orphanage_ip));
1205                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1206
1207                         /*
1208                          * don't replace .. value if it already points
1209                          * to us.  that'll pop a libxfs/kernel ASSERT.
1210                          */
1211                         if (entry_ino_num != orphanage_ino)  {
1212                                 err = -libxfs_dir_replace(tp, ino_p,
1213                                                 &xfs_name_dotdot, orphanage_ino,
1214                                                 nres);
1215                                 if (err)
1216                                         do_error(
1217         _("name replace op failed (%d), filesystem may be out of space\n"),
1218                                                 err);
1219                         }
1220
1221                         libxfs_defer_ijoin(&dfops, ino_p);
1222                         err = -libxfs_defer_finish(&tp, &dfops);
1223                         if (err)
1224                                 do_error(
1225         _("bmap finish failed (%d), filesystem may be out of space\n"),
1226                                         err);
1227
1228                         libxfs_trans_commit(tp);
1229                 }
1230
1231         } else  {
1232                 /*
1233                  * use the remove log reservation as that's
1234                  * more accurate.  we're only creating the
1235                  * links, we're not doing the inode allocation
1236                  * also accounted for in the create
1237                  */
1238                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1239                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1240                                           nres, 0, 0, &tp);
1241                 if (err)
1242                         do_error(
1243         _("space reservation failed (%d), filesystem may be out of space\n"),
1244                                 err);
1245
1246                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1247                 libxfs_trans_ijoin(tp, ino_p, 0);
1248
1249                 libxfs_defer_init(tp, &dfops, &first);
1250                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1251                                                 nres);
1252                 if (err)
1253                         do_error(
1254         _("name create failed in %s (%d), filesystem may be out of space\n"),
1255                                 ORPHANAGE, err);
1256                 ASSERT(err == 0);
1257
1258                 set_nlink(VFS_I(ino_p), 1);
1259                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1260
1261                 libxfs_defer_ijoin(&dfops, ino_p);
1262                 err = -libxfs_defer_finish(&tp, &dfops);
1263                 if (err)
1264                         do_error(
1265         _("bmap finish failed (%d), filesystem may be out of space\n"),
1266                                 err);
1267
1268                 libxfs_trans_commit(tp);
1269         }
1270         IRELE(ino_p);
1271         IRELE(orphanage_ip);
1272 }
1273
1274 static int
1275 entry_junked(
1276         const char      *msg,
1277         const char      *iname,
1278         xfs_ino_t       ino1,
1279         xfs_ino_t       ino2)
1280 {
1281         do_warn(msg, iname, ino1, ino2);
1282         if (!no_modify) {
1283                 if (verbose)
1284                         do_warn(_(", marking entry to be junked\n"));
1285                 else
1286                         do_warn("\n");
1287         } else
1288                 do_warn(_(", would junk entry\n"));
1289         return !no_modify;
1290 }
1291
1292 /* Find and invalidate all the directory's buffers. */
1293 static int
1294 dir_binval(
1295         struct xfs_trans        *tp,
1296         struct xfs_inode        *ip,
1297         int                     whichfork)
1298 {
1299         struct xfs_iext_cursor  icur;
1300         struct xfs_bmbt_irec    rec;
1301         struct xfs_ifork        *ifp;
1302         struct xfs_da_geometry  *geo;
1303         struct xfs_buf          *bp;
1304         xfs_dablk_t             dabno, end_dabno;
1305         int                     error = 0;
1306
1307         if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
1308             ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
1309                 return 0;
1310
1311         geo = tp->t_mountp->m_dir_geo;
1312         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1313         for_each_xfs_iext(ifp, &icur, &rec) {
1314                 dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1315                                 geo->fsbcount - 1);
1316                 end_dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1317                                 rec.br_blockcount);
1318                 for (; dabno <= end_dabno; dabno += geo->fsbcount) {
1319                         bp = NULL;
1320                         error = -libxfs_da_get_buf(tp, ip, dabno, -2, &bp,
1321                                         whichfork);
1322                         if (error)
1323                                 return error;
1324                         if (!bp)
1325                                 continue;
1326                         libxfs_trans_binval(tp, bp);
1327                         libxfs_trans_brelse(tp, bp);
1328                 }
1329         }
1330
1331         return error;
1332 }
1333
1334 /*
1335  * Unexpected failure during the rebuild will leave the entries in
1336  * lost+found on the next run
1337  */
1338
1339 static void
1340 longform_dir2_rebuild(
1341         xfs_mount_t             *mp,
1342         xfs_ino_t               ino,
1343         xfs_inode_t             *ip,
1344         ino_tree_node_t         *irec,
1345         int                     ino_offset,
1346         dir_hash_tab_t          *hashtab)
1347 {
1348         int                     error;
1349         int                     nres;
1350         xfs_trans_t             *tp;
1351         xfs_fileoff_t           lastblock;
1352         xfs_fsblock_t           firstblock;
1353         struct xfs_defer_ops            dfops;
1354         xfs_inode_t             pip;
1355         dir_hash_ent_t          *p;
1356         int                     done;
1357
1358         /*
1359          * trash directory completely and rebuild from scratch using the
1360          * name/inode pairs in the hash table
1361          */
1362
1363         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1364
1365         /*
1366          * first attempt to locate the parent inode, if it can't be
1367          * found, set it to the root inode and it'll be moved to the
1368          * orphanage later (the inode number here needs to be valid
1369          * for the libxfs_dir_init() call).
1370          */
1371         pip.i_ino = get_inode_parent(irec, ino_offset);
1372         if (pip.i_ino == NULLFSINO ||
1373             libxfs_dir_ino_validate(mp, pip.i_ino))
1374                 pip.i_ino = mp->m_sb.sb_rootino;
1375
1376         libxfs_defer_init(NULL, &dfops, &firstblock);
1377
1378         nres = XFS_REMOVE_SPACE_RES(mp);
1379         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1380         if (error)
1381                 res_failed(error);
1382         libxfs_trans_ijoin(tp, ip, 0);
1383
1384         error = dir_binval(tp, ip, XFS_DATA_FORK);
1385         if (error)
1386                 res_failed(error);
1387
1388         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1389                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1390                         error);
1391
1392         /* free all data, leaf, node and freespace blocks */
1393         error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA, 0,
1394                                 &firstblock, &done);
1395         if (error) {
1396                 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1397                 goto out_bmap_cancel;
1398         }
1399
1400         ASSERT(done);
1401
1402         error = -libxfs_dir_init(tp, ip, &pip);
1403         if (error) {
1404                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1405                 goto out_bmap_cancel;
1406         }
1407
1408         libxfs_defer_ijoin(&dfops, ip);
1409         error = -libxfs_defer_finish(&tp, &dfops);
1410
1411         libxfs_trans_commit(tp);
1412
1413         if (ino == mp->m_sb.sb_rootino)
1414                 need_root_dotdot = 0;
1415
1416         /* go through the hash list and re-add the inodes */
1417
1418         for (p = hashtab->first; p; p = p->nextbyorder) {
1419
1420                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1421                                 (p->name.len == 1 || (p->name.len == 2 &&
1422                                                 p->name.name[1] == '.'))))
1423                         continue;
1424
1425                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1426                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1427                                             nres, 0, 0, &tp);
1428                 if (error)
1429                         res_failed(error);
1430
1431                 libxfs_trans_ijoin(tp, ip, 0);
1432
1433                 libxfs_defer_init(tp, &dfops, &firstblock);
1434                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1435                                                 nres);
1436                 if (error) {
1437                         do_warn(
1438 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1439                                 ino, error);
1440                         goto out_bmap_cancel;
1441                 }
1442
1443                 libxfs_defer_ijoin(&dfops, ip);
1444                 error = -libxfs_defer_finish(&tp, &dfops);
1445                 if (error) {
1446                         do_warn(
1447         _("bmap finish failed (%d), filesystem may be out of space\n"),
1448                                 error);
1449                         goto out_bmap_cancel;
1450                 }
1451
1452                 libxfs_trans_commit(tp);
1453         }
1454
1455         return;
1456
1457 out_bmap_cancel:
1458         libxfs_defer_cancel(&dfops);
1459         libxfs_trans_cancel(tp);
1460         return;
1461 }
1462
1463
1464 /*
1465  * Kill a block in a version 2 inode.
1466  * Makes its own transaction.
1467  */
1468 static void
1469 dir2_kill_block(
1470         xfs_mount_t     *mp,
1471         xfs_inode_t     *ip,
1472         xfs_dablk_t     da_bno,
1473         struct xfs_buf  *bp)
1474 {
1475         xfs_da_args_t   args;
1476         int             error;
1477         xfs_fsblock_t   firstblock;
1478         struct xfs_defer_ops    dfops;
1479         int             nres;
1480         xfs_trans_t     *tp;
1481
1482         nres = XFS_REMOVE_SPACE_RES(mp);
1483         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1484         if (error)
1485                 res_failed(error);
1486         libxfs_trans_ijoin(tp, ip, 0);
1487         libxfs_trans_bjoin(tp, bp);
1488         memset(&args, 0, sizeof(args));
1489         libxfs_defer_init(tp, &dfops, &firstblock);
1490         args.dp = ip;
1491         args.trans = tp;
1492         args.firstblock = &firstblock;
1493         args.whichfork = XFS_DATA_FORK;
1494         args.geo = mp->m_dir_geo;
1495         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1496                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1497         else
1498                 error = -libxfs_dir2_shrink_inode(&args,
1499                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1500         if (error)
1501                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1502                         ip->i_ino, da_bno);
1503         libxfs_defer_ijoin(&dfops, ip);
1504         libxfs_defer_finish(&tp, &dfops);
1505         libxfs_trans_commit(tp);
1506 }
1507
1508 /*
1509  * process a data block, also checks for .. entry
1510  * and corrects it to match what we think .. should be
1511  */
1512 static void
1513 longform_dir2_entry_check_data(
1514         xfs_mount_t             *mp,
1515         xfs_inode_t             *ip,
1516         int                     *num_illegal,
1517         int                     *need_dot,
1518         ino_tree_node_t         *current_irec,
1519         int                     current_ino_offset,
1520         struct xfs_buf          **bpp,
1521         dir_hash_tab_t          *hashtab,
1522         freetab_t               **freetabp,
1523         xfs_dablk_t             da_bno,
1524         int                     isblock)
1525 {
1526         xfs_dir2_dataptr_t      addr;
1527         xfs_dir2_leaf_entry_t   *blp;
1528         struct xfs_buf          *bp;
1529         xfs_dir2_block_tail_t   *btp;
1530         struct xfs_dir2_data_hdr *d;
1531         xfs_dir2_db_t           db;
1532         xfs_dir2_data_entry_t   *dep;
1533         xfs_dir2_data_unused_t  *dup;
1534         struct xfs_dir2_data_free *bf;
1535         char                    *endptr;
1536         int                     error;
1537         xfs_fsblock_t           firstblock;
1538         struct xfs_defer_ops            dfops;
1539         char                    fname[MAXNAMELEN + 1];
1540         freetab_t               *freetab;
1541         int                     i;
1542         int                     ino_offset;
1543         xfs_ino_t               inum;
1544         ino_tree_node_t         *irec;
1545         int                     junkit;
1546         int                     lastfree;
1547         int                     len;
1548         int                     nbad;
1549         int                     needlog;
1550         int                     needscan;
1551         xfs_ino_t               parent;
1552         char                    *ptr;
1553         xfs_trans_t             *tp;
1554         int                     wantmagic;
1555         struct xfs_da_args      da = {
1556                 .dp = ip,
1557                 .geo = mp->m_dir_geo,
1558         };
1559
1560
1561         bp = *bpp;
1562         d = bp->b_addr;
1563         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1564         nbad = 0;
1565         needscan = needlog = 0;
1566         junkit = 0;
1567         freetab = *freetabp;
1568         if (isblock) {
1569                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1570                 blp = xfs_dir2_block_leaf_p(btp);
1571                 endptr = (char *)blp;
1572                 if (endptr > (char *)btp)
1573                         endptr = (char *)btp;
1574                 if (xfs_sb_version_hascrc(&mp->m_sb))
1575                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1576                 else
1577                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1578         } else {
1579                 endptr = (char *)d + mp->m_dir_geo->blksize;
1580                 if (xfs_sb_version_hascrc(&mp->m_sb))
1581                         wantmagic = XFS_DIR3_DATA_MAGIC;
1582                 else
1583                         wantmagic = XFS_DIR2_DATA_MAGIC;
1584         }
1585         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1586
1587         /* check for data block beyond expected end */
1588         if (freetab->naents <= db) {
1589                 struct freetab_ent e;
1590
1591                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1592                 if (!freetab) {
1593                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1594                                 __func__, FREETAB_SIZE(db + 1));
1595                 }
1596                 e.v = NULLDATAOFF;
1597                 e.s = 0;
1598                 for (i = freetab->naents; i < db; i++)
1599                         freetab->ents[i] = e;
1600                 freetab->naents = db + 1;
1601         }
1602
1603         /* check the data block */
1604         while (ptr < endptr) {
1605
1606                 /* check for freespace */
1607                 dup = (xfs_dir2_data_unused_t *)ptr;
1608                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1609
1610                         /* check for invalid freespace length */
1611                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1612                                         be16_to_cpu(dup->length) == 0 ||
1613                                         (be16_to_cpu(dup->length) &
1614                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1615                                 break;
1616
1617                         /* check for invalid tag */
1618                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1619                                                 (char *)dup - (char *)d)
1620                                 break;
1621
1622                         /* check for block with no data entries */
1623                         if ((ptr == (char *)M_DIROPS(mp)->data_entry_p(d)) &&
1624                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1625                                 junkit = 1;
1626                                 *num_illegal += 1;
1627                                 break;
1628                         }
1629
1630                         /* continue at the end of the freespace */
1631                         ptr += be16_to_cpu(dup->length);
1632                         if (ptr >= endptr)
1633                                 break;
1634                 }
1635
1636                 /* validate data entry size */
1637                 dep = (xfs_dir2_data_entry_t *)ptr;
1638                 if (ptr + M_DIROPS(mp)->data_entsize(dep->namelen) > endptr)
1639                         break;
1640                 if (be16_to_cpu(*M_DIROPS(mp)->data_entry_tag_p(dep)) !=
1641                                                 (char *)dep - (char *)d)
1642                         break;
1643                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1644         }
1645
1646         /* did we find an empty or corrupt block? */
1647         if (ptr != endptr) {
1648                 if (junkit) {
1649                         do_warn(
1650         _("empty data block %u in directory inode %" PRIu64 ": "),
1651                                 da_bno, ip->i_ino);
1652                 } else {
1653                         do_warn(_
1654         ("corrupt block %u in directory inode %" PRIu64 ": "),
1655                                 da_bno, ip->i_ino);
1656                 }
1657                 if (!no_modify) {
1658                         do_warn(_("junking block\n"));
1659                         dir2_kill_block(mp, ip, da_bno, bp);
1660                 } else {
1661                         do_warn(_("would junk block\n"));
1662                         libxfs_putbuf(bp);
1663                 }
1664                 freetab->ents[db].v = NULLDATAOFF;
1665                 *bpp = NULL;
1666                 return;
1667         }
1668
1669         /* update number of data blocks processed */
1670         if (freetab->nents < db + 1)
1671                 freetab->nents = db + 1;
1672
1673         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1674         if (error)
1675                 res_failed(error);
1676         da.trans = tp;
1677         libxfs_trans_ijoin(tp, ip, 0);
1678         libxfs_trans_bjoin(tp, bp);
1679         libxfs_trans_bhold(tp, bp);
1680         libxfs_defer_init(tp, &dfops, &firstblock);
1681         if (be32_to_cpu(d->magic) != wantmagic) {
1682                 do_warn(
1683         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1684                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1685                 if (!no_modify) {
1686                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1687                         d->magic = cpu_to_be32(wantmagic);
1688                         needlog = 1;
1689                 } else
1690                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1691         }
1692         lastfree = 0;
1693         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1694         /*
1695          * look at each entry.  reference inode pointed to by each
1696          * entry in the incore inode tree.
1697          * if not a directory, set reached flag, increment link count
1698          * if a directory and reached, mark entry as to be deleted.
1699          * if a directory, check to see if recorded parent
1700          *      matches current inode #,
1701          *      if so, then set reached flag, increment link count
1702          *              of current and child dir inodes, push the child
1703          *              directory inode onto the directory stack.
1704          *      if current inode != parent, then mark entry to be deleted.
1705          */
1706         while (ptr < endptr) {
1707                 dup = (xfs_dir2_data_unused_t *)ptr;
1708                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1709                         if (lastfree) {
1710                                 do_warn(
1711         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1712                                         ip->i_ino, da_bno);
1713                                 if (!no_modify) {
1714
1715                                         do_warn(_("joining together\n"));
1716                                         len = be16_to_cpu(dup->length);
1717                                         libxfs_dir2_data_use_free(&da, bp, dup,
1718                                                 ptr - (char *)d, len, &needlog,
1719                                                 &needscan);
1720                                         libxfs_dir2_data_make_free(&da, bp,
1721                                                 ptr - (char *)d, len, &needlog,
1722                                                 &needscan);
1723                                 } else
1724                                         do_warn(_("would join together\n"));
1725                         }
1726                         ptr += be16_to_cpu(dup->length);
1727                         lastfree = 1;
1728                         continue;
1729                 }
1730                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1731                                                   ptr - (char *)d);
1732                 dep = (xfs_dir2_data_entry_t *)ptr;
1733                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1734                 inum = be64_to_cpu(dep->inumber);
1735                 lastfree = 0;
1736                 /*
1737                  * skip bogus entries (leading '/').  they'll be deleted
1738                  * later.  must still log it, else we leak references to
1739                  * buffers.
1740                  */
1741                 if (dep->name[0] == '/')  {
1742                         nbad++;
1743                         if (!no_modify)
1744                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1745                         continue;
1746                 }
1747
1748                 memmove(fname, dep->name, dep->namelen);
1749                 fname[dep->namelen] = '\0';
1750                 ASSERT(inum != NULLFSINO);
1751
1752                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1753                                         XFS_INO_TO_AGINO(mp, inum));
1754                 if (irec == NULL)  {
1755                         nbad++;
1756                         if (entry_junked(
1757         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1758                                         fname, ip->i_ino, inum)) {
1759                                 dep->name[0] = '/';
1760                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1761                         }
1762                         continue;
1763                 }
1764                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1765
1766                 /*
1767                  * if it's a free inode, blow out the entry.
1768                  * by now, any inode that we think is free
1769                  * really is free.
1770                  */
1771                 if (is_inode_free(irec, ino_offset))  {
1772                         nbad++;
1773                         if (entry_junked(
1774         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1775                                         fname, ip->i_ino, inum)) {
1776                                 dep->name[0] = '/';
1777                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1778                         }
1779                         continue;
1780                 }
1781
1782                 /*
1783                  * check if this inode is lost+found dir in the root
1784                  */
1785                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1786                         /*
1787                          * if it's not a directory, trash it
1788                          */
1789                         if (!inode_isadir(irec, ino_offset)) {
1790                                 nbad++;
1791                                 if (entry_junked(
1792         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1793                                                 ORPHANAGE, inum, ip->i_ino)) {
1794                                         dep->name[0] = '/';
1795                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1796                                 }
1797                                 continue;
1798                         }
1799                         /*
1800                          * if this is a dup, it will be picked up below,
1801                          * otherwise, mark it as the orphanage for later.
1802                          */
1803                         if (!orphanage_ino)
1804                                 orphanage_ino = inum;
1805                 }
1806
1807                 /*
1808                  * check for duplicate names in directory.
1809                  */
1810                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1811                                 dep->name, M_DIROPS(mp)->data_get_ftype(dep))) {
1812                         nbad++;
1813                         if (entry_junked(
1814         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1815                                         fname, inum, ip->i_ino)) {
1816                                 dep->name[0] = '/';
1817                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1818                         }
1819                         if (inum == orphanage_ino)
1820                                 orphanage_ino = 0;
1821                         continue;
1822                 }
1823
1824                 /*
1825                  * if just scanning to rebuild a directory due to a ".."
1826                  * update, just continue
1827                  */
1828                 if (dotdot_update)
1829                         continue;
1830
1831                 /*
1832                  * skip the '..' entry since it's checked when the
1833                  * directory is reached by something else.  if it never
1834                  * gets reached, it'll be moved to the orphanage and we'll
1835                  * take care of it then. If it doesn't exist at all, the
1836                  * directory needs to be rebuilt first before being added
1837                  * to the orphanage.
1838                  */
1839                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1840                                 dep->name[1] == '.') {
1841                         if (da_bno != 0) {
1842                                 /* ".." should be in the first block */
1843                                 nbad++;
1844                                 if (entry_junked(
1845         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1846                                                 inum, ip->i_ino)) {
1847                                         dep->name[0] = '/';
1848                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1849                                 }
1850                         }
1851                         continue;
1852                 }
1853                 ASSERT(no_modify || !verify_inum(mp, inum));
1854                 /*
1855                  * special case the . entry.  we know there's only one
1856                  * '.' and only '.' points to itself because bogus entries
1857                  * got trashed in phase 3 if there were > 1.
1858                  * bump up link count for '.' but don't set reached
1859                  * until we're actually reached by another directory
1860                  * '..' is already accounted for or will be taken care
1861                  * of when directory is moved to orphanage.
1862                  */
1863                 if (ip->i_ino == inum)  {
1864                         ASSERT(no_modify ||
1865                                (dep->name[0] == '.' && dep->namelen == 1));
1866                         add_inode_ref(current_irec, current_ino_offset);
1867                         if (da_bno != 0 ||
1868                             dep != M_DIROPS(mp)->data_entry_p(d)) {
1869                                 /* "." should be the first entry */
1870                                 nbad++;
1871                                 if (entry_junked(
1872         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1873                                                 fname, inum, ip->i_ino)) {
1874                                         dep->name[0] = '/';
1875                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1876                                 }
1877                         }
1878                         *need_dot = 0;
1879                         continue;
1880                 }
1881                 /*
1882                  * skip entries with bogus inumbers if we're in no modify mode
1883                  */
1884                 if (no_modify && verify_inum(mp, inum))
1885                         continue;
1886
1887                 /* validate ftype field if supported */
1888                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1889                         uint8_t dir_ftype;
1890                         uint8_t ino_ftype;
1891
1892                         dir_ftype = M_DIROPS(mp)->data_get_ftype(dep);
1893                         ino_ftype = get_inode_ftype(irec, ino_offset);
1894
1895                         if (dir_ftype != ino_ftype) {
1896                                 if (no_modify) {
1897                                         do_warn(
1898         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1899                                                 dir_ftype, ino_ftype,
1900                                                 ip->i_ino, inum);
1901                                 } else {
1902                                         do_warn(
1903         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1904                                                 dir_ftype, ino_ftype,
1905                                                 ip->i_ino, inum);
1906                                         M_DIROPS(mp)->data_put_ftype(dep,
1907                                                                 ino_ftype);
1908                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1909                                         dir_hash_update_ftype(hashtab, addr,
1910                                                               ino_ftype);
1911                                 }
1912                         }
1913                 }
1914
1915                 /*
1916                  * check easy case first, regular inode, just bump
1917                  * the link count and continue
1918                  */
1919                 if (!inode_isadir(irec, ino_offset))  {
1920                         add_inode_reached(irec, ino_offset);
1921                         continue;
1922                 }
1923                 parent = get_inode_parent(irec, ino_offset);
1924                 ASSERT(parent != 0);
1925                 junkit = 0;
1926                 /*
1927                  * bump up the link counts in parent and child
1928                  * directory but if the link doesn't agree with
1929                  * the .. in the child, blow out the entry.
1930                  * if the directory has already been reached,
1931                  * blow away the entry also.
1932                  */
1933                 if (is_inode_reached(irec, ino_offset))  {
1934                         junkit = 1;
1935                         do_warn(
1936 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1937                                 fname, ip->i_ino, inum);
1938                 } else if (parent == ip->i_ino)  {
1939                         add_inode_reached(irec, ino_offset);
1940                         add_inode_ref(current_irec, current_ino_offset);
1941                 } else if (parent == NULLFSINO) {
1942                         /* ".." was missing, but this entry refers to it,
1943                            so, set it as the parent and mark for rebuild */
1944                         do_warn(
1945         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1946                                 fname, ip->i_ino, inum);
1947                         set_inode_parent(irec, ino_offset, ip->i_ino);
1948                         add_inode_reached(irec, ino_offset);
1949                         add_inode_ref(current_irec, current_ino_offset);
1950                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1951                                                                 ino_offset);
1952                 } else  {
1953                         junkit = 1;
1954                         do_warn(
1955 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1956                                 fname, ip->i_ino, parent, inum);
1957                 }
1958                 if (junkit)  {
1959                         if (inum == orphanage_ino)
1960                                 orphanage_ino = 0;
1961                         nbad++;
1962                         if (!no_modify)  {
1963                                 dep->name[0] = '/';
1964                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1965                                 if (verbose)
1966                                         do_warn(
1967                                         _("\twill clear entry \"%s\"\n"),
1968                                                 fname);
1969                         } else  {
1970                                 do_warn(_("\twould clear entry \"%s\"\n"),
1971                                         fname);
1972                         }
1973                 }
1974         }
1975         *num_illegal += nbad;
1976         if (needscan)
1977                 libxfs_dir2_data_freescan_int(mp->m_dir_geo, M_DIROPS(mp),
1978                                 d, &i);
1979         if (needlog)
1980                 libxfs_dir2_data_log_header(&da, bp);
1981         libxfs_defer_ijoin(&dfops, ip);
1982         libxfs_defer_finish(&tp, &dfops);
1983         libxfs_trans_commit(tp);
1984
1985         /* record the largest free space in the freetab for later checking */
1986         bf = M_DIROPS(mp)->data_bestfree_p(d);
1987         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1988         freetab->ents[db].s = 0;
1989 }
1990
1991 /* check v5 metadata */
1992 static int
1993 __check_dir3_header(
1994         struct xfs_mount        *mp,
1995         struct xfs_buf          *bp,
1996         xfs_ino_t               ino,
1997         __be64                  owner,
1998         __be64                  blkno,
1999         uuid_t                  *uuid)
2000 {
2001
2002         /* verify owner */
2003         if (be64_to_cpu(owner) != ino) {
2004                 do_warn(
2005 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
2006                         ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
2007                 return 1;
2008         }
2009         /* verify block number */
2010         if (be64_to_cpu(blkno) != bp->b_bn) {
2011                 do_warn(
2012 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
2013                         bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
2014                 return 1;
2015         }
2016         /* verify uuid */
2017         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
2018                 do_warn(
2019 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
2020                         ino, bp->b_bn);
2021                 return 1;
2022         }
2023
2024         return 0;
2025 }
2026
2027 static int
2028 check_da3_header(
2029         struct xfs_mount        *mp,
2030         struct xfs_buf          *bp,
2031         xfs_ino_t               ino)
2032 {
2033         struct xfs_da3_blkinfo  *info = bp->b_addr;
2034
2035         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2036                                    &info->uuid);
2037 }
2038
2039 static int
2040 check_dir3_header(
2041         struct xfs_mount        *mp,
2042         struct xfs_buf          *bp,
2043         xfs_ino_t               ino)
2044 {
2045         struct xfs_dir3_blk_hdr *info = bp->b_addr;
2046
2047         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2048                                    &info->uuid);
2049 }
2050
2051 /*
2052  * Check contents of leaf-form block.
2053  */
2054 static int
2055 longform_dir2_check_leaf(
2056         xfs_mount_t             *mp,
2057         xfs_inode_t             *ip,
2058         dir_hash_tab_t          *hashtab,
2059         freetab_t               *freetab)
2060 {
2061         int                     badtail;
2062         __be16                  *bestsp;
2063         struct xfs_buf          *bp;
2064         xfs_dablk_t             da_bno;
2065         int                     i;
2066         xfs_dir2_leaf_t         *leaf;
2067         xfs_dir2_leaf_tail_t    *ltp;
2068         int                     seeval;
2069         struct xfs_dir2_leaf_entry *ents;
2070         struct xfs_dir3_icleaf_hdr leafhdr;
2071         int                     error;
2072         int                     fixit = 0;
2073
2074         da_bno = mp->m_dir_geo->leafblk;
2075         error = dir_read_buf(ip, da_bno, -1, &bp, &xfs_dir3_leaf1_buf_ops,
2076                              &fixit);
2077         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
2078                 do_warn(
2079         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
2080                         da_bno, ip->i_ino);
2081                 return 1;
2082         } else if (error) {
2083                 do_error(
2084         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
2085                         da_bno, ip->i_ino, error);
2086                 /* NOTREACHED */
2087         }
2088
2089         leaf = bp->b_addr;
2090         M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2091         ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2092         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
2093         bestsp = xfs_dir2_leaf_bests_p(ltp);
2094         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
2095               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
2096                                 leafhdr.forw || leafhdr.back ||
2097                                 leafhdr.count < leafhdr.stale ||
2098                                 leafhdr.count >
2099                                         M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2100                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2101                 do_warn(
2102         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2103                         da_bno, ip->i_ino);
2104                 libxfs_putbuf(bp);
2105                 return 1;
2106         }
2107
2108         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2109                 error = check_da3_header(mp, bp, ip->i_ino);
2110                 if (error) {
2111                         libxfs_putbuf(bp);
2112                         return error;
2113                 }
2114         }
2115
2116         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2117         if (dir_hash_check(hashtab, ip, seeval)) {
2118                 libxfs_putbuf(bp);
2119                 return 1;
2120         }
2121         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2122         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2123                 freetab->ents[i].s = 1;
2124                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2125         }
2126         if (badtail) {
2127                 do_warn(
2128         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2129                         da_bno, ip->i_ino);
2130                 libxfs_putbuf(bp);
2131                 return 1;
2132         }
2133         libxfs_putbuf(bp);
2134         return fixit;
2135 }
2136
2137 /*
2138  * Check contents of the node blocks (leaves)
2139  * Looks for matching hash values for the data entries.
2140  */
2141 static int
2142 longform_dir2_check_node(
2143         xfs_mount_t             *mp,
2144         xfs_inode_t             *ip,
2145         dir_hash_tab_t          *hashtab,
2146         freetab_t               *freetab)
2147 {
2148         struct xfs_buf          *bp;
2149         xfs_dablk_t             da_bno;
2150         xfs_dir2_db_t           fdb;
2151         xfs_dir2_free_t         *free;
2152         int                     i;
2153         xfs_dir2_leaf_t         *leaf;
2154         xfs_fileoff_t           next_da_bno;
2155         int                     seeval = 0;
2156         int                     used;
2157         struct xfs_dir2_leaf_entry *ents;
2158         struct xfs_dir3_icleaf_hdr leafhdr;
2159         struct xfs_dir3_icfree_hdr freehdr;
2160         __be16                  *bests;
2161         int                     error;
2162         int                     fixit = 0;
2163
2164         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2165                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2166                         da_bno = (xfs_dablk_t)next_da_bno) {
2167                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2168                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2169                         break;
2170
2171                 /*
2172                  * we need to use the da3 node verifier here as it handles the
2173                  * fact that reading the leaf hash tree blocks can return either
2174                  * leaf or node blocks and calls the correct verifier. If we get
2175                  * a node block, then we'll skip it below based on a magic
2176                  * number check.
2177                  */
2178                 error = dir_read_buf(ip, da_bno, -1, &bp,
2179                                      &xfs_da3_node_buf_ops, &fixit);
2180                 if (error) {
2181                         do_warn(
2182         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2183                                 da_bno, ip->i_ino, error);
2184                         return 1;
2185                 }
2186                 leaf = bp->b_addr;
2187                 M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2188                 ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2189                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2190                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2191                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2192                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2193                         do_warn(
2194         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2195                                 leafhdr.magic, da_bno, ip->i_ino);
2196                         libxfs_putbuf(bp);
2197                         return 1;
2198                 }
2199
2200                 /* check v5 metadata */
2201                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2202                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2203                         error = check_da3_header(mp, bp, ip->i_ino);
2204                         if (error) {
2205                                 libxfs_putbuf(bp);
2206                                 return error;
2207                         }
2208                 }
2209
2210                 /* ignore nodes */
2211                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2212                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2213                         libxfs_putbuf(bp);
2214                         continue;
2215                 }
2216
2217                 /*
2218                  * If there's a validator error, we need to ensure that we got
2219                  * the right ops on the buffer for when we write it back out.
2220                  */
2221                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2222                 if (leafhdr.count > M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2223                     leafhdr.count < leafhdr.stale) {
2224                         do_warn(
2225         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2226                                 da_bno, ip->i_ino);
2227                         libxfs_putbuf(bp);
2228                         return 1;
2229                 }
2230                 seeval = dir_hash_see_all(hashtab, ents,
2231                                         leafhdr.count, leafhdr.stale);
2232                 libxfs_putbuf(bp);
2233                 if (seeval != DIR_HASH_CK_OK)
2234                         return 1;
2235         }
2236         if (dir_hash_check(hashtab, ip, seeval))
2237                 return 1;
2238
2239         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2240              next_da_bno != NULLFILEOFF;
2241              da_bno = (xfs_dablk_t)next_da_bno) {
2242                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2243                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2244                         break;
2245
2246                 error = dir_read_buf(ip, da_bno, -1, &bp,
2247                                      &xfs_dir3_free_buf_ops, &fixit);
2248                 if (error) {
2249                         do_warn(
2250         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2251                                 da_bno, ip->i_ino, error);
2252                         return 1;
2253                 }
2254                 free = bp->b_addr;
2255                 M_DIROPS(mp)->free_hdr_from_disk(&freehdr, free);
2256                 bests = M_DIROPS(mp)->free_bests_p(free);
2257                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2258                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2259                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2260                     freehdr.firstdb !=
2261                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2262                         M_DIROPS(mp)->free_max_bests(mp->m_dir_geo) ||
2263                     freehdr.nvalid < freehdr.nused) {
2264                         do_warn(
2265         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2266                                 da_bno, ip->i_ino);
2267                         libxfs_putbuf(bp);
2268                         return 1;
2269                 }
2270
2271                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2272                         error = check_dir3_header(mp, bp, ip->i_ino);
2273                         if (error) {
2274                                 libxfs_putbuf(bp);
2275                                 return error;
2276                         }
2277                 }
2278                 for (i = used = 0; i < freehdr.nvalid; i++) {
2279                         if (i + freehdr.firstdb >= freetab->nents ||
2280                                         freetab->ents[i + freehdr.firstdb].v !=
2281                                                 be16_to_cpu(bests[i])) {
2282                                 do_warn(
2283         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2284                                         da_bno, i, ip->i_ino);
2285                                 libxfs_putbuf(bp);
2286                                 return 1;
2287                         }
2288                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2289                         freetab->ents[i + freehdr.firstdb].s = 1;
2290                 }
2291                 if (used != freehdr.nused) {
2292                         do_warn(
2293         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2294                                 da_bno, ip->i_ino);
2295                         libxfs_putbuf(bp);
2296                         return 1;
2297                 }
2298                 libxfs_putbuf(bp);
2299         }
2300         for (i = 0; i < freetab->nents; i++) {
2301                 if ((freetab->ents[i].s == 0) &&
2302                     (freetab->ents[i].v != NULLDATAOFF)) {
2303                         do_warn(
2304         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2305                                 i, ip->i_ino);
2306                         return 1;
2307                 }
2308         }
2309         return fixit;
2310 }
2311
2312 /*
2313  * If a directory is corrupt, we need to read in as many entries as possible,
2314  * destroy the entry and create a new one with recovered name/inode pairs.
2315  * (ie. get libxfs to do all the grunt work)
2316  */
2317 static void
2318 longform_dir2_entry_check(xfs_mount_t   *mp,
2319                         xfs_ino_t       ino,
2320                         xfs_inode_t     *ip,
2321                         int             *num_illegal,
2322                         int             *need_dot,
2323                         ino_tree_node_t *irec,
2324                         int             ino_offset,
2325                         dir_hash_tab_t  *hashtab)
2326 {
2327         struct xfs_buf          **bplist;
2328         xfs_dablk_t             da_bno;
2329         freetab_t               *freetab;
2330         int                     num_bps;
2331         int                     i;
2332         int                     isblock;
2333         int                     isleaf;
2334         xfs_fileoff_t           next_da_bno;
2335         int                     seeval;
2336         int                     fixit = 0;
2337         xfs_dir2_db_t           db;
2338         struct xfs_da_args      args;
2339
2340         *need_dot = 1;
2341         freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2342         if (!freetab) {
2343                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2344                         __func__,
2345                         FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2346                 exit(1);
2347         }
2348         freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2349         freetab->nents = 0;
2350         for (i = 0; i < freetab->naents; i++) {
2351                 freetab->ents[i].v = NULLDATAOFF;
2352                 freetab->ents[i].s = 0;
2353         }
2354         num_bps = freetab->naents;
2355         bplist = calloc(num_bps, sizeof(struct xfs_buf*));
2356         if (!bplist)
2357                 do_error(_("calloc failed in %s (%zu bytes)\n"),
2358                         __func__, num_bps * sizeof(struct xfs_buf*));
2359
2360         /* is this a block, leaf, or node directory? */
2361         args.dp = ip;
2362         args.geo = mp->m_dir_geo;
2363         libxfs_dir2_isblock(&args, &isblock);
2364         libxfs_dir2_isleaf(&args, &isleaf);
2365
2366         /* check directory "data" blocks (ie. name/inode pairs) */
2367         for (da_bno = 0, next_da_bno = 0;
2368              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2369              da_bno = (xfs_dablk_t)next_da_bno) {
2370                 const struct xfs_buf_ops *ops;
2371                 int                      error;
2372                 struct xfs_dir2_data_hdr *d;
2373
2374                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2375                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) {
2376                         /*
2377                          * if this is the first block, there isn't anything we
2378                          * can recover so we just trash it.
2379                          */
2380                          if (da_bno == 0) {
2381                                 fixit++;
2382                                 goto out_fix;
2383                         }
2384                         break;
2385                 }
2386
2387                 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2388                 if (db >= num_bps) {
2389                         /* more data blocks than expected */
2390                         num_bps = db + 1;
2391                         bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
2392                         if (!bplist)
2393                                 do_error(_("realloc failed in %s (%zu bytes)\n"),
2394                                         __func__,
2395                                         num_bps * sizeof(struct xfs_buf*));
2396                 }
2397
2398                 if (isblock)
2399                         ops = &xfs_dir3_block_buf_ops;
2400                 else
2401                         ops = &xfs_dir3_data_buf_ops;
2402
2403                 error = dir_read_buf(ip, da_bno, -1, &bplist[db], ops, &fixit);
2404                 if (error) {
2405                         do_warn(
2406         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2407                                 da_bno, ino, error);
2408                         *num_illegal += 1;
2409
2410                         /*
2411                          * we try to read all "data" blocks, but if we are in
2412                          * block form and we fail, there isn't anything else to
2413                          * read, and nothing we can do but trash it.
2414                          */
2415                         if (isblock) {
2416                                 fixit++;
2417                                 goto out_fix;
2418                         }
2419                         continue;
2420                 }
2421
2422                 /* check v5 metadata */
2423                 d = bplist[db]->b_addr;
2424                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2425                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2426                         struct xfs_buf           *bp = bplist[db];
2427
2428                         error = check_dir3_header(mp, bp, ino);
2429                         if (error) {
2430                                 fixit++;
2431                                 continue;
2432                         }
2433                 }
2434
2435                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2436                                 irec, ino_offset, &bplist[db], hashtab,
2437                                 &freetab, da_bno, isblock);
2438         }
2439         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2440
2441         if (!dotdot_update) {
2442                 /* check btree and freespace */
2443                 if (isblock) {
2444                         struct xfs_dir2_data_hdr *block;
2445                         xfs_dir2_block_tail_t   *btp;
2446                         xfs_dir2_leaf_entry_t   *blp;
2447
2448                         block = bplist[0]->b_addr;
2449                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2450                         blp = xfs_dir2_block_leaf_p(btp);
2451                         seeval = dir_hash_see_all(hashtab, blp,
2452                                                 be32_to_cpu(btp->count),
2453                                                 be32_to_cpu(btp->stale));
2454                         if (dir_hash_check(hashtab, ip, seeval))
2455                                 fixit |= 1;
2456                 } else if (isleaf) {
2457                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2458                                                                 freetab);
2459                 } else {
2460                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2461                                                                 freetab);
2462                 }
2463         }
2464 out_fix:
2465         if (!no_modify && (fixit || dotdot_update)) {
2466                 dir_hash_dup_names(hashtab);
2467                 for (i = 0; i < num_bps; i++)
2468                         if (bplist[i])
2469                                 libxfs_putbuf(bplist[i]);
2470                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2471                 *num_illegal = 0;
2472                 *need_dot = 0;
2473         } else {
2474                 for (i = 0; i < num_bps; i++)
2475                         if (bplist[i])
2476                                 libxfs_putbuf(bplist[i]);
2477         }
2478
2479         free(bplist);
2480         free(freetab);
2481 }
2482
2483 /*
2484  * shortform directory v2 processing routines -- entry verification and
2485  * bad entry deletion (pruning).
2486  */
2487 static struct xfs_dir2_sf_entry *
2488 shortform_dir2_junk(
2489         struct xfs_mount        *mp,
2490         struct xfs_dir2_sf_hdr  *sfp,
2491         struct xfs_dir2_sf_entry *sfep,
2492         xfs_ino_t               lino,
2493         int                     *max_size,
2494         int                     *index,
2495         int                     *bytes_deleted,
2496         int                     *ino_dirty)
2497 {
2498         struct xfs_dir2_sf_entry *next_sfep;
2499         int                     next_len;
2500         int                     next_elen;
2501
2502         if (lino == orphanage_ino)
2503                 orphanage_ino = 0;
2504
2505         next_elen = M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen);
2506         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2507
2508         /*
2509          * if we are just checking, simply return the pointer to the next entry
2510          * here so that the checking loop can continue.
2511          */
2512         if (no_modify) {
2513                 do_warn(_("would junk entry\n"));
2514                 return next_sfep;
2515         }
2516
2517         /*
2518          * now move all the remaining entries down over the junked entry and
2519          * clear the newly unused bytes at the tail of the directory region.
2520          */
2521         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2522         *max_size -= next_elen;
2523         *bytes_deleted += next_elen;
2524
2525         memmove(sfep, next_sfep, next_len);
2526         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2527         sfp->count -= 1;
2528         *ino_dirty = 1;
2529
2530         /*
2531          * WARNING:  drop the index i by one so it matches the decremented count
2532          * for accurate comparisons in the loop test
2533          */
2534         (*index)--;
2535
2536         if (verbose)
2537                 do_warn(_("junking entry\n"));
2538         else
2539                 do_warn("\n");
2540         return sfep;
2541 }
2542
2543 static void
2544 shortform_dir2_entry_check(xfs_mount_t  *mp,
2545                         xfs_ino_t       ino,
2546                         xfs_inode_t     *ip,
2547                         int             *ino_dirty,
2548                         ino_tree_node_t *current_irec,
2549                         int             current_ino_offset,
2550                         dir_hash_tab_t  *hashtab)
2551 {
2552         xfs_ino_t               lino;
2553         xfs_ino_t               parent;
2554         struct xfs_dir2_sf_hdr  *sfp;
2555         struct xfs_dir2_sf_entry *sfep;
2556         struct xfs_dir2_sf_entry *next_sfep;
2557         struct xfs_ifork        *ifp;
2558         struct ino_tree_node    *irec;
2559         int                     max_size;
2560         int                     ino_offset;
2561         int                     i;
2562         int                     bad_sfnamelen;
2563         int                     namelen;
2564         int                     bytes_deleted;
2565         char                    fname[MAXNAMELEN + 1];
2566         int                     i8;
2567
2568         ifp = &ip->i_df;
2569         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2570         *ino_dirty = 0;
2571         bytes_deleted = 0;
2572
2573         max_size = ifp->if_bytes;
2574         ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2575
2576         /*
2577          * if just rebuild a directory due to a "..", update and return
2578          */
2579         if (dotdot_update) {
2580                 parent = get_inode_parent(current_irec, current_ino_offset);
2581                 if (no_modify) {
2582                         do_warn(
2583         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2584                                 ino, parent);
2585                 } else {
2586                         do_warn(
2587         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2588                                 ino, parent);
2589                         M_DIROPS(mp)->sf_put_parent_ino(sfp, parent);
2590                         *ino_dirty = 1;
2591                 }
2592                 return;
2593         }
2594
2595         /*
2596          * no '.' entry in shortform dirs, just bump up ref count by 1
2597          * '..' was already (or will be) accounted for and checked when
2598          * the directory is reached or will be taken care of when the
2599          * directory is moved to orphanage.
2600          */
2601         add_inode_ref(current_irec, current_ino_offset);
2602
2603         /*
2604          * Initialise i8 counter -- the parent inode number counts as well.
2605          */
2606         i8 = M_DIROPS(mp)->sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2607
2608         /*
2609          * now run through entries, stop at first bad entry, don't need
2610          * to skip over '..' since that's encoded in its own field and
2611          * no need to worry about '.' since it doesn't exist.
2612          */
2613         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2614
2615         for (i = 0; i < sfp->count && max_size >
2616                                         (intptr_t)next_sfep - (intptr_t)sfp;
2617                         sfep = next_sfep, i++)  {
2618                 bad_sfnamelen = 0;
2619
2620                 lino = M_DIROPS(mp)->sf_get_ino(sfp, sfep);
2621
2622                 namelen = sfep->namelen;
2623
2624                 ASSERT(no_modify || namelen > 0);
2625
2626                 if (no_modify && namelen == 0)  {
2627                         /*
2628                          * if we're really lucky, this is
2629                          * the last entry in which case we
2630                          * can use the dir size to set the
2631                          * namelen value.  otherwise, forget
2632                          * it because we're not going to be
2633                          * able to find the next entry.
2634                          */
2635                         bad_sfnamelen = 1;
2636
2637                         if (i == sfp->count - 1)  {
2638                                 namelen = ip->i_d.di_size -
2639                                         ((intptr_t) &sfep->name[0] -
2640                                          (intptr_t) sfp);
2641                         } else  {
2642                                 /*
2643                                  * don't process the rest of the directory,
2644                                  * break out of processing loop
2645                                  */
2646                                 break;
2647                         }
2648                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2649                                 + M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)
2650                                 > ip->i_d.di_size)  {
2651                         bad_sfnamelen = 1;
2652
2653                         if (i == sfp->count - 1)  {
2654                                 namelen = ip->i_d.di_size -
2655                                         ((intptr_t) &sfep->name[0] -
2656                                          (intptr_t) sfp);
2657                         } else  {
2658                                 /*
2659                                  * don't process the rest of the directory,
2660                                  * break out of processing loop
2661                                  */
2662                                 break;
2663                         }
2664                 }
2665
2666                 memmove(fname, sfep->name, sfep->namelen);
2667                 fname[sfep->namelen] = '\0';
2668
2669                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2670                 ASSERT(no_modify || !verify_inum(mp, lino));
2671
2672                 /*
2673                  * Also skip entries with bogus inode numbers if we're
2674                  * in no modify mode.
2675                  */
2676
2677                 if (no_modify && verify_inum(mp, lino))  {
2678                         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2679                         continue;
2680                 }
2681
2682                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2683                                         XFS_INO_TO_AGINO(mp, lino));
2684
2685                 if (irec == NULL)  {
2686                         do_warn(
2687         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2688                                 fname, ino, lino);
2689                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2690                                                 &max_size, &i, &bytes_deleted,
2691                                                 ino_dirty);
2692                         continue;
2693                 }
2694
2695                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2696
2697                 /*
2698                  * if it's a free inode, blow out the entry.
2699                  * by now, any inode that we think is free
2700                  * really is free.
2701                  */
2702                 if (is_inode_free(irec, ino_offset))  {
2703                         do_warn(
2704         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2705                                 fname, ino, lino);
2706                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2707                                                 &max_size, &i, &bytes_deleted,
2708                                                 ino_dirty);
2709                         continue;
2710                 }
2711                 /*
2712                  * check if this inode is lost+found dir in the root
2713                  */
2714                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2715                         /*
2716                          * if it's not a directory, trash it
2717                          */
2718                         if (!inode_isadir(irec, ino_offset)) {
2719                                 do_warn(
2720         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2721                                         ORPHANAGE, lino, ino);
2722                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2723                                                 lino, &max_size, &i,
2724                                                 &bytes_deleted, ino_dirty);
2725                                 continue;
2726                         }
2727                         /*
2728                          * if this is a dup, it will be picked up below,
2729                          * otherwise, mark it as the orphanage for later.
2730                          */
2731                         if (!orphanage_ino)
2732                                 orphanage_ino = lino;
2733                 }
2734                 /*
2735                  * check for duplicate names in directory.
2736                  */
2737                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2738                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2739                                 lino, sfep->namelen, sfep->name,
2740                                 M_DIROPS(mp)->sf_get_ftype(sfep))) {
2741                         do_warn(
2742 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2743                                 fname, lino, ino);
2744                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2745                                                 &max_size, &i, &bytes_deleted,
2746                                                 ino_dirty);
2747                         continue;
2748                 }
2749
2750                 if (!inode_isadir(irec, ino_offset))  {
2751                         /*
2752                          * check easy case first, regular inode, just bump
2753                          * the link count
2754                          */
2755                         add_inode_reached(irec, ino_offset);
2756                 } else  {
2757                         parent = get_inode_parent(irec, ino_offset);
2758
2759                         /*
2760                          * bump up the link counts in parent and child.
2761                          * directory but if the link doesn't agree with
2762                          * the .. in the child, blow out the entry
2763                          */
2764                         if (is_inode_reached(irec, ino_offset))  {
2765                                 do_warn(
2766         _("entry \"%s\" in directory inode %" PRIu64
2767           " references already connected inode %" PRIu64 ".\n"),
2768                                         fname, ino, lino);
2769                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2770                                                 lino, &max_size, &i,
2771                                                 &bytes_deleted, ino_dirty);
2772                                 continue;
2773                         } else if (parent == ino)  {
2774                                 add_inode_reached(irec, ino_offset);
2775                                 add_inode_ref(current_irec, current_ino_offset);
2776                         } else if (parent == NULLFSINO) {
2777                                 /* ".." was missing, but this entry refers to it,
2778                                 so, set it as the parent and mark for rebuild */
2779                                 do_warn(
2780         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2781                                         fname, ino, lino);
2782                                 set_inode_parent(irec, ino_offset, ino);
2783                                 add_inode_reached(irec, ino_offset);
2784                                 add_inode_ref(current_irec, current_ino_offset);
2785                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2786                                                         irec, ino_offset);
2787                         } else  {
2788                                 do_warn(
2789         _("entry \"%s\" in directory inode %" PRIu64
2790           " not consistent with .. value (%" PRIu64
2791           ") in inode %" PRIu64 ",\n"),
2792                                         fname, ino, parent, lino);
2793                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2794                                                 lino, &max_size, &i,
2795                                                 &bytes_deleted, ino_dirty);
2796                                 continue;
2797                         }
2798                 }
2799
2800                 /* validate ftype field if supported */
2801                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2802                         uint8_t dir_ftype;
2803                         uint8_t ino_ftype;
2804
2805                         dir_ftype = M_DIROPS(mp)->sf_get_ftype(sfep);
2806                         ino_ftype = get_inode_ftype(irec, ino_offset);
2807
2808                         if (dir_ftype != ino_ftype) {
2809                                 if (no_modify) {
2810                                         do_warn(
2811         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2812                                                 dir_ftype, ino_ftype,
2813                                                 ino, lino);
2814                                 } else {
2815                                         do_warn(
2816         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2817                                                 dir_ftype, ino_ftype,
2818                                                 ino, lino);
2819                                         M_DIROPS(mp)->sf_put_ftype(sfep,
2820                                                                 ino_ftype);
2821                                         dir_hash_update_ftype(hashtab,
2822                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2823                                                               ino_ftype);
2824                                         *ino_dirty = 1;
2825                                 }
2826                         }
2827                 }
2828
2829                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2830                         i8++;
2831
2832                 /*
2833                  * go onto next entry - we have to take entries with bad namelen
2834                  * into account in no modify mode since we calculate size based
2835                  * on next_sfep.
2836                  */
2837                 ASSERT(no_modify || bad_sfnamelen == 0);
2838                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2839                               (bad_sfnamelen
2840                                 ? M_DIROPS(mp)->sf_entsize(sfp, namelen)
2841                                 : M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)));
2842         }
2843
2844         if (sfp->i8count != i8) {
2845                 if (no_modify) {
2846                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2847                                 ino);
2848                 } else {
2849                         if (i8 == 0) {
2850                                 struct xfs_dir2_sf_entry *tmp_sfep;
2851
2852                                 tmp_sfep = next_sfep;
2853                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2854                                 bytes_deleted +=
2855                                         (intptr_t)next_sfep -
2856                                         (intptr_t)tmp_sfep;
2857                                 next_sfep = tmp_sfep;
2858                         } else
2859                                 sfp->i8count = i8;
2860                         *ino_dirty = 1;
2861                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2862                                 ino);
2863                 }
2864         }
2865
2866         /*
2867          * sync up sizes if required
2868          */
2869         if (*ino_dirty && bytes_deleted > 0)  {
2870                 ASSERT(!no_modify);
2871                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2872                 ip->i_d.di_size -= bytes_deleted;
2873         }
2874
2875         if (ip->i_d.di_size != ip->i_df.if_bytes)  {
2876                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2877                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2878                 ip->i_d.di_size = (xfs_fsize_t)
2879                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2880                 do_warn(
2881         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2882                         ip->i_d.di_size);
2883                 *ino_dirty = 1;
2884         }
2885 }
2886
2887 /*
2888  * processes all reachable inodes in directories
2889  */
2890 static void
2891 process_dir_inode(
2892         xfs_mount_t             *mp,
2893         xfs_agnumber_t          agno,
2894         ino_tree_node_t         *irec,
2895         int                     ino_offset)
2896 {
2897         xfs_ino_t               ino;
2898         struct xfs_defer_ops            dfops;
2899         xfs_fsblock_t           first;
2900         xfs_inode_t             *ip;
2901         xfs_trans_t             *tp;
2902         dir_hash_tab_t          *hashtab;
2903         int                     need_dot;
2904         int                     dirty, num_illegal, error, nres;
2905
2906         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2907
2908         /*
2909          * open up directory inode, check all entries,
2910          * then call prune_dir_entries to remove all
2911          * remaining illegal directory entries.
2912          */
2913
2914         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2915
2916         error = -libxfs_iget(mp, NULL, ino, 0, &ip, &phase6_ifork_ops);
2917         if (error) {
2918                 if (!no_modify)
2919                         do_error(
2920         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2921                                 ino, error);
2922                 else  {
2923                         do_warn(
2924         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2925                                 ino, error);
2926                         /*
2927                          * see below for what we're doing if this
2928                          * is root.  Why do we need to do this here?
2929                          * to ensure that the root doesn't show up
2930                          * as being disconnected in the no_modify case.
2931                          */
2932                         if (mp->m_sb.sb_rootino == ino)  {
2933                                 add_inode_reached(irec, 0);
2934                                 add_inode_ref(irec, 0);
2935                         }
2936                 }
2937
2938                 add_inode_refchecked(irec, 0);
2939                 return;
2940         }
2941
2942         need_dot = dirty = num_illegal = 0;
2943
2944         if (mp->m_sb.sb_rootino == ino)  {
2945                 /*
2946                  * mark root inode reached and bump up
2947                  * link count for root inode to account
2948                  * for '..' entry since the root inode is
2949                  * never reached by a parent.  we know
2950                  * that root's '..' is always good --
2951                  * guaranteed by phase 3 and/or below.
2952                  */
2953                 add_inode_reached(irec, ino_offset);
2954         }
2955
2956         add_inode_refchecked(irec, ino_offset);
2957
2958         hashtab = dir_hash_init(ip->i_d.di_size);
2959
2960         /*
2961          * look for bogus entries
2962          */
2963         switch (ip->i_d.di_format)  {
2964                 case XFS_DINODE_FMT_EXTENTS:
2965                 case XFS_DINODE_FMT_BTREE:
2966                         /*
2967                          * also check for missing '.' in longform dirs.
2968                          * missing .. entries are added if required when
2969                          * the directory is connected to lost+found. but
2970                          * we need to create '.' entries here.
2971                          */
2972                         longform_dir2_entry_check(mp, ino, ip,
2973                                                 &num_illegal, &need_dot,
2974                                                 irec, ino_offset,
2975                                                 hashtab);
2976                         break;
2977
2978                 case XFS_DINODE_FMT_LOCAL:
2979                         /*
2980                          * using the remove reservation is overkill
2981                          * since at most we'll only need to log the
2982                          * inode but it's easier than wedging a
2983                          * new define in ourselves.
2984                          */
2985                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2986                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2987                                                     nres, 0, 0, &tp);
2988                         if (error)
2989                                 res_failed(error);
2990
2991                         libxfs_trans_ijoin(tp, ip, 0);
2992
2993                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2994                                                 irec, ino_offset,
2995                                                 hashtab);
2996
2997                         ASSERT(dirty == 0 || (dirty && !no_modify));
2998                         if (dirty)  {
2999                                 libxfs_trans_log_inode(tp, ip,
3000                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
3001                                 libxfs_trans_commit(tp);
3002                         } else  {
3003                                 libxfs_trans_cancel(tp);
3004                         }
3005                         break;
3006
3007                 default:
3008                         break;
3009         }
3010         dir_hash_done(hashtab);
3011
3012         /*
3013          * if we have to create a .. for /, do it now *before*
3014          * we delete the bogus entries, otherwise the directory
3015          * could transform into a shortform dir which would
3016          * probably cause the simulation to choke.  Even
3017          * if the illegal entries get shifted around, it's ok
3018          * because the entries are structurally intact and in
3019          * in hash-value order so the simulation won't get confused
3020          * if it has to move them around.
3021          */
3022         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3023                 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
3024
3025                 do_warn(_("recreating root directory .. entry\n"));
3026
3027                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
3028                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3029                                             nres, 0, 0, &tp);
3030                 if (error)
3031                         res_failed(error);
3032
3033                 libxfs_trans_ijoin(tp, ip, 0);
3034
3035                 libxfs_defer_init(tp, &dfops, &first);
3036
3037                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
3038                                         ip->i_ino, nres);
3039                 if (error)
3040                         do_error(
3041         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
3042
3043                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3044
3045                 libxfs_defer_ijoin(&dfops, ip);
3046                 error = -libxfs_defer_finish(&tp, &dfops);
3047                 ASSERT(error == 0);
3048                 libxfs_trans_commit(tp);
3049
3050                 need_root_dotdot = 0;
3051         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3052                 do_warn(_("would recreate root directory .. entry\n"));
3053         }
3054
3055         /*
3056          * if we need to create the '.' entry, do so only if
3057          * the directory is a longform dir.  if it's been
3058          * turned into a shortform dir, then the inode is ok
3059          * since shortform dirs have no '.' entry and the inode
3060          * has already been committed by prune_lf_dir_entry().
3061          */
3062         if (need_dot)  {
3063                 /*
3064                  * bump up our link count but don't
3065                  * bump up the inode link count.  chances
3066                  * are good that even though we lost '.'
3067                  * the inode link counts reflect '.' so
3068                  * leave the inode link count alone and if
3069                  * it turns out to be wrong, we'll catch
3070                  * that in phase 7.
3071                  */
3072                 add_inode_ref(irec, ino_offset);
3073
3074                 if (no_modify)  {
3075                         do_warn(
3076         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
3077                                 ino);
3078                 } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
3079                         /*
3080                          * need to create . entry in longform dir.
3081                          */
3082                         do_warn(
3083         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
3084
3085                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
3086                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3087                                                     nres, 0, 0, &tp);
3088                         if (error)
3089                                 res_failed(error);
3090
3091                         libxfs_trans_ijoin(tp, ip, 0);
3092
3093                         libxfs_defer_init(tp, &dfops, &first);
3094
3095                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
3096                                         ip->i_ino, nres);
3097                         if (error)
3098                                 do_error(
3099         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
3100                                         ino, error);
3101
3102                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3103
3104                         libxfs_defer_ijoin(&dfops, ip);
3105                         error = -libxfs_defer_finish(&tp, &dfops);
3106                         ASSERT(error == 0);
3107                         libxfs_trans_commit(tp);
3108                 }
3109         }
3110         IRELE(ip);
3111 }
3112
3113 /*
3114  * mark realtime bitmap and summary inodes as reached.
3115  * quota inode will be marked here as well
3116  */
3117 static void
3118 mark_standalone_inodes(xfs_mount_t *mp)
3119 {
3120         ino_tree_node_t         *irec;
3121         int                     offset;
3122
3123         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3124                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3125
3126         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3127                         irec->ino_startnum;
3128
3129         add_inode_reached(irec, offset);
3130
3131         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3132                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3133
3134         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3135                         irec->ino_startnum;
3136
3137         add_inode_reached(irec, offset);
3138
3139         if (fs_quotas)  {
3140                 if (mp->m_sb.sb_uquotino
3141                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3142                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3143                                                 mp->m_sb.sb_uquotino),
3144                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3145                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3146                                         - irec->ino_startnum;
3147                         add_inode_reached(irec, offset);
3148                 }
3149                 if (mp->m_sb.sb_gquotino
3150                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3151                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3152                                                 mp->m_sb.sb_gquotino),
3153                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3154                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3155                                         - irec->ino_startnum;
3156                         add_inode_reached(irec, offset);
3157                 }
3158                 if (mp->m_sb.sb_pquotino
3159                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3160                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3161                                                 mp->m_sb.sb_pquotino),
3162                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3163                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3164                                         - irec->ino_startnum;
3165                         add_inode_reached(irec, offset);
3166                 }
3167         }
3168 }
3169
3170 static void
3171 check_for_orphaned_inodes(
3172         xfs_mount_t             *mp,
3173         xfs_agnumber_t          agno,
3174         ino_tree_node_t         *irec)
3175 {
3176         int                     i;
3177         xfs_ino_t               ino;
3178
3179         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3180                 ASSERT(is_inode_confirmed(irec, i));
3181                 if (is_inode_free(irec, i))
3182                         continue;
3183
3184                 if (is_inode_reached(irec, i))
3185                         continue;
3186
3187                 ASSERT(inode_isadir(irec, i) ||
3188                         num_inode_references(irec, i) == 0);
3189
3190                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3191                 if (inode_isadir(irec, i))
3192                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3193                 else
3194                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3195                 if (!no_modify)  {
3196                         if (!orphanage_ino)
3197                                 orphanage_ino = mk_orphanage(mp);
3198                         do_warn(_("moving to %s\n"), ORPHANAGE);
3199                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3200                 } else  {
3201                         do_warn(_("would move to %s\n"), ORPHANAGE);
3202                 }
3203                 /*
3204                  * for read-only case, even though the inode isn't
3205                  * really reachable, set the flag (and bump our link
3206                  * count) anyway to fool phase 7
3207                  */
3208                 add_inode_reached(irec, i);
3209         }
3210 }
3211
3212 static void
3213 traverse_function(
3214         struct workqueue        *wq,
3215         xfs_agnumber_t          agno,
3216         void                    *arg)
3217 {
3218         ino_tree_node_t         *irec;
3219         int                     i;
3220         prefetch_args_t         *pf_args = arg;
3221
3222         wait_for_inode_prefetch(pf_args);
3223
3224         if (verbose)
3225                 do_log(_("        - agno = %d\n"), agno);
3226
3227         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3228                 if (irec->ino_isa_dir == 0)
3229                         continue;
3230
3231                 if (pf_args) {
3232                         sem_post(&pf_args->ra_count);
3233 #ifdef XR_PF_TRACE
3234                         sem_getvalue(&pf_args->ra_count, &i);
3235                         pftrace(
3236                 "processing inode chunk %p in AG %d (sem count = %d)",
3237                                 irec, agno, i);
3238 #endif
3239                 }
3240
3241                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3242                         if (inode_isadir(irec, i))
3243                                 process_dir_inode(wq->wq_ctx, agno, irec, i);
3244                 }
3245         }
3246         cleanup_inode_prefetch(pf_args);
3247 }
3248
3249 static void
3250 update_missing_dotdot_entries(
3251         xfs_mount_t             *mp)
3252 {
3253         dotdot_update_t         *dir;
3254
3255         /*
3256          * these entries parents were updated, rebuild them again
3257          * set dotdot_update flag so processing routines do not count links
3258          */
3259         dotdot_update = 1;
3260         while (!list_empty(&dotdot_update_list)) {
3261                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3262                                  list);
3263                 list_del(&dir->list);
3264                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3265                 free(dir);
3266         }
3267 }
3268
3269 static void
3270 traverse_ags(
3271         struct xfs_mount        *mp)
3272 {
3273         do_inode_prefetch(mp, 0, traverse_function, false, true);
3274 }
3275
3276 void
3277 phase6(xfs_mount_t *mp)
3278 {
3279         ino_tree_node_t         *irec;
3280         int                     i;
3281
3282         memset(&zerocr, 0, sizeof(struct cred));
3283         memset(&zerofsx, 0, sizeof(struct fsxattr));
3284         orphanage_ino = 0;
3285
3286         do_log(_("Phase 6 - check inode connectivity...\n"));
3287
3288         incore_ext_teardown(mp);
3289
3290         add_ino_ex_data(mp);
3291
3292         /*
3293          * verify existence of root directory - if we have to
3294          * make one, it's ok for the incore data structs not to
3295          * know about it since everything about it (and the other
3296          * inodes in its chunk if a new chunk was created) are ok
3297          */
3298         if (need_root_inode)  {
3299                 if (!no_modify)  {
3300                         do_warn(_("reinitializing root directory\n"));
3301                         mk_root_dir(mp);
3302                         need_root_inode = 0;
3303                         need_root_dotdot = 0;
3304                 } else  {
3305                         do_warn(_("would reinitialize root directory\n"));
3306                 }
3307         }
3308
3309         if (need_rbmino)  {
3310                 if (!no_modify)  {
3311                         do_warn(_("reinitializing realtime bitmap inode\n"));
3312                         mk_rbmino(mp);
3313                         need_rbmino = 0;
3314                 } else  {
3315                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3316                 }
3317         }
3318
3319         if (need_rsumino)  {
3320                 if (!no_modify)  {
3321                         do_warn(_("reinitializing realtime summary inode\n"));
3322                         mk_rsumino(mp);
3323                         need_rsumino = 0;
3324                 } else  {
3325                         do_warn(_("would reinitialize realtime summary inode\n"));
3326                 }
3327         }
3328
3329         if (!no_modify)  {
3330                 do_log(
3331 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3332                 if (fill_rbmino(mp))  {
3333                         do_warn(
3334                         _("Warning:  realtime bitmap may be inconsistent\n"));
3335                 }
3336
3337                 if (fill_rsumino(mp))  {
3338                         do_warn(
3339                         _("Warning:  realtime bitmap may be inconsistent\n"));
3340                 }
3341         }
3342
3343         mark_standalone_inodes(mp);
3344
3345         do_log(_("        - traversing filesystem ...\n"));
3346
3347         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3348                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3349
3350         /*
3351          * we always have a root inode, even if it's free...
3352          * if the root is free, forget it, lost+found is already gone
3353          */
3354         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3355                 need_root_inode = 1;
3356         }
3357
3358         /*
3359          * then process all inodes by walking incore inode tree
3360          */
3361         traverse_ags(mp);
3362
3363         /*
3364          * any directories that had updated ".." entries, rebuild them now
3365          */
3366         update_missing_dotdot_entries(mp);
3367
3368         do_log(_("        - traversal finished ...\n"));
3369         do_log(_("        - moving disconnected inodes to %s ...\n"),
3370                 ORPHANAGE);
3371
3372         /*
3373          * move all disconnected inodes to the orphanage
3374          */
3375         for (i = 0; i < glob_agcount; i++)  {
3376                 irec = findfirst_inode_rec(i);
3377                 while (irec != NULL)  {
3378                         check_for_orphaned_inodes(mp, i, irec);
3379                         irec = next_ino_rec(irec);
3380                 }
3381         }
3382 }