repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "prefetch.h"
  10 #include "avl.h"
  11 #include "globals.h"
  12 #include "agheader.h"
  13 #include "incore.h"
  14 #include "dir2.h"
  15 #include "protos.h"
  16 #include "err_protos.h"
  17 #include "dinode.h"
  18 #include "progress.h"
  19 #include "versions.h"
  20
  21 static struct cred              zerocr;
  22 static struct fsxattr           zerofsx;
  23 static xfs_ino_t                orphanage_ino;
  24
  25 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  26                                                 1,
  27                                                 XFS_DIR3_FT_DIR};
  28
  29 /*
  30  * When we're checking directory inodes, we're allowed to set a directory's
  31  * dotdot entry to zero to signal that the parent needs to be reconnected
  32  * during phase 6.  If we're handling a shortform directory the ifork
  33  * verifiers will fail, so temporarily patch out this canary so that we can
  34  * verify the rest of the fork and move on to fixing the dir.
  35  */
  36 static xfs_failaddr_t
  37 phase6_verify_dir(
  38         struct xfs_inode                *ip)
  39 {
  40         struct xfs_mount                *mp = ip->i_mount;
  41         const struct xfs_dir_ops        *dops;
  42         struct xfs_ifork                *ifp;
  43         struct xfs_dir2_sf_hdr          *sfp;
  44         xfs_failaddr_t                  fa;
  45         xfs_ino_t                       old_parent;
  46         bool                            parent_bypass = false;
  47         int                             size;
  48
  49         dops = libxfs_dir_get_ops(mp, NULL);
  50
  51         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
  52         sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
  53         size = ifp->if_bytes;
  54
  55         /*
  56          * If this is a shortform directory, phase4 may have set the parent
  57          * inode to zero to indicate that it must be fixed.  Temporarily
  58          * set a valid parent so that the directory verifier will pass.
  59          */
  60         if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
  61             size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
  62                 old_parent = dops->sf_get_parent_ino(sfp);
  63                 if (old_parent == 0) {
  64                         dops->sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
  65                         parent_bypass = true;
  66                 }
  67         }
  68
  69         fa = libxfs_default_ifork_ops.verify_dir(ip);
  70
  71         /* Put it back. */
  72         if (parent_bypass)
  73                 dops->sf_put_parent_ino(sfp, old_parent);
  74
  75         return fa;
  76 }
  77
  78 static struct xfs_ifork_ops phase6_ifork_ops = {
  79         .verify_attr    = xfs_attr_shortform_verify,
  80         .verify_dir     = phase6_verify_dir,
  81         .verify_symlink = xfs_symlink_shortform_verify,
  82 };
  83
  84 /*
  85  * Data structures used to keep track of directories where the ".."
  86  * entries are updated. These must be rebuilt after the initial pass
  87  */
  88 typedef struct dotdot_update {
  89         struct list_head        list;
  90         ino_tree_node_t         *irec;
  91         xfs_agnumber_t          agno;
  92         int                     ino_offset;
  93 } dotdot_update_t;
  94
  95 static LIST_HEAD(dotdot_update_list);
  96 static int                      dotdot_update;
  97
  98 static void
  99 add_dotdot_update(
 100         xfs_agnumber_t          agno,
 101         ino_tree_node_t         *irec,
 102         int                     ino_offset)
 103 {
 104         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
 105
 106         if (!dir)
 107                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
 108                         sizeof(dotdot_update_t));
 109
 110         INIT_LIST_HEAD(&dir->list);
 111         dir->irec = irec;
 112         dir->agno = agno;
 113         dir->ino_offset = ino_offset;
 114
 115         list_add(&dir->list, &dotdot_update_list);
 116 }
 117
 118 /*
 119  * Data structures and routines to keep track of directory entries
 120  * and whether their leaf entry has been seen. Also used for name
 121  * duplicate checking and rebuilding step if required.
 122  */
 123 typedef struct dir_hash_ent {
 124         struct dir_hash_ent     *nextbyaddr;    /* next in addr bucket */
 125         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
 126         struct dir_hash_ent     *nextbyorder;   /* next in order added */
 127         xfs_dahash_t            hashval;        /* hash value of name */
 128         uint32_t                address;        /* offset of data entry */
 129         xfs_ino_t               inum;           /* inode num of entry */
 130         short                   junkit;         /* name starts with / */
 131         short                   seen;           /* have seen leaf entry */
 132         struct xfs_name         name;
 133 } dir_hash_ent_t;
 134
 135 typedef struct dir_hash_tab {
 136         int                     size;           /* size of hash tables */
 137         int                     names_duped;    /* 1 = ent names malloced */
 138         dir_hash_ent_t          *first;         /* ptr to first added entry */
 139         dir_hash_ent_t          *last;          /* ptr to last added entry */
 140         dir_hash_ent_t          **byhash;       /* ptr to name hash buckets */
 141         dir_hash_ent_t          **byaddr;       /* ptr to addr hash buckets */
 142 } dir_hash_tab_t;
 143
 144 #define DIR_HASH_TAB_SIZE(n)    \
 145         (sizeof(dir_hash_tab_t) + (sizeof(dir_hash_ent_t *) * (n) * 2))
 146 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
 147
 148 /*
 149  * Track the contents of the freespace table in a directory.
 150  */
 151 typedef struct freetab {
 152         int                     naents; /* expected number of data blocks */
 153         int                     nents;  /* number of data blocks processed */
 154         struct freetab_ent {
 155                 xfs_dir2_data_off_t     v;
 156                 short                   s;
 157         } ents[1];
 158 } freetab_t;
 159 #define FREETAB_SIZE(n) \
 160         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 161
 162 #define DIR_HASH_CK_OK          0
 163 #define DIR_HASH_CK_DUPLEAF     1
 164 #define DIR_HASH_CK_BADHASH     2
 165 #define DIR_HASH_CK_NODATA      3
 166 #define DIR_HASH_CK_NOLEAF      4
 167 #define DIR_HASH_CK_BADSTALE    5
 168 #define DIR_HASH_CK_TOTAL       6
 169
 170 /*
 171  * Need to handle CRC and validation errors specially here. If there is a
 172  * validator error, re-read without the verifier so that we get a buffer we can
 173  * check and repair. Re-attach the ops to the buffer after the read so that when
 174  * it is rewritten the CRC is recalculated.
 175  *
 176  * If the buffer was not read, we return an error. If the buffer was read but
 177  * had a CRC or corruption error, we reread it without the verifier and if it is
 178  * read successfully we increment *crc_error and return 0. Otherwise we
 179  * return the read error.
 180  */
 181 static int
 182 dir_read_buf(
 183         struct xfs_inode        *ip,
 184         xfs_dablk_t             bno,
 185         xfs_daddr_t             mappedbno,
 186         struct xfs_buf          **bpp,
 187         const struct xfs_buf_ops *ops,
 188         int                     *crc_error)
 189 {
 190         int error;
 191         int error2;
 192
 193         error = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 194                                    XFS_DATA_FORK, ops);
 195
 196         if (error != EFSBADCRC && error != EFSCORRUPTED)
 197                 return error;
 198
 199         error2 = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 200                                    XFS_DATA_FORK, NULL);
 201         if (error2)
 202                 return error2;
 203
 204         (*crc_error)++;
 205         (*bpp)->b_ops = ops;
 206         return 0;
 207 }
 208
 209 /*
 210  * Returns 0 if the name already exists (ie. a duplicate)
 211  */
 212 static int
 213 dir_hash_add(
 214         xfs_mount_t             *mp,
 215         dir_hash_tab_t          *hashtab,
 216         uint32_t                addr,
 217         xfs_ino_t               inum,
 218         int                     namelen,
 219         unsigned char           *name,
 220         uint8_t                 ftype)
 221 {
 222         xfs_dahash_t            hash = 0;
 223         int                     byaddr;
 224         int                     byhash = 0;
 225         dir_hash_ent_t          *p;
 226         int                     dup;
 227         short                   junk;
 228         struct xfs_name         xname;
 229
 230         ASSERT(!hashtab->names_duped);
 231
 232         xname.name = name;
 233         xname.len = namelen;
 234         xname.type = ftype;
 235
 236         junk = name[0] == '/';
 237         byaddr = DIR_HASH_FUNC(hashtab, addr);
 238         dup = 0;
 239
 240         if (!junk) {
 241                 hash = mp->m_dirnameops->hashname(&xname);
 242                 byhash = DIR_HASH_FUNC(hashtab, hash);
 243
 244                 /*
 245                  * search hash bucket for existing name.
 246                  */
 247                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 248                         if (p->hashval == hash && p->name.len == namelen) {
 249                                 if (memcmp(p->name.name, name, namelen) == 0) {
 250                                         dup = 1;
 251                                         junk = 1;
 252                                         break;
 253                                 }
 254                         }
 255                 }
 256         }
 257
 258         if ((p = malloc(sizeof(*p))) == NULL)
 259                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 260                         sizeof(*p));
 261
 262         p->nextbyaddr = hashtab->byaddr[byaddr];
 263         hashtab->byaddr[byaddr] = p;
 264         if (hashtab->last)
 265                 hashtab->last->nextbyorder = p;
 266         else
 267                 hashtab->first = p;
 268         p->nextbyorder = NULL;
 269         hashtab->last = p;
 270
 271         if (!(p->junkit = junk)) {
 272                 p->hashval = hash;
 273                 p->nextbyhash = hashtab->byhash[byhash];
 274                 hashtab->byhash[byhash] = p;
 275         }
 276         p->address = addr;
 277         p->inum = inum;
 278         p->seen = 0;
 279         p->name = xname;
 280
 281         return !dup;
 282 }
 283
 284 /*
 285  * checks to see if any data entries are not in the leaf blocks
 286  */
 287 static int
 288 dir_hash_unseen(
 289         dir_hash_tab_t  *hashtab)
 290 {
 291         int             i;
 292         dir_hash_ent_t  *p;
 293
 294         for (i = 0; i < hashtab->size; i++) {
 295                 for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 296                         if (p->seen == 0)
 297                                 return 1;
 298                 }
 299         }
 300         return 0;
 301 }
 302
 303 static int
 304 dir_hash_check(
 305         dir_hash_tab_t  *hashtab,
 306         xfs_inode_t     *ip,
 307         int             seeval)
 308 {
 309         static char     *seevalstr[DIR_HASH_CK_TOTAL];
 310         static int      done;
 311
 312         if (!done) {
 313                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 314                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 315                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 316                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 317                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 318                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 319                 done = 1;
 320         }
 321
 322         if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
 323                 seeval = DIR_HASH_CK_NOLEAF;
 324         if (seeval == DIR_HASH_CK_OK)
 325                 return 0;
 326         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 327                 ip->i_ino, seevalstr[seeval]);
 328         if (!no_modify)
 329                 do_warn(_("rebuilding\n"));
 330         else
 331                 do_warn(_("would rebuild\n"));
 332         return 1;
 333 }
 334
 335 static void
 336 dir_hash_done(
 337         dir_hash_tab_t  *hashtab)
 338 {
 339         int             i;
 340         dir_hash_ent_t  *n;
 341         dir_hash_ent_t  *p;
 342
 343         for (i = 0; i < hashtab->size; i++) {
 344                 for (p = hashtab->byaddr[i]; p; p = n) {
 345                         n = p->nextbyaddr;
 346                         if (hashtab->names_duped)
 347                                 free((void *)p->name.name);
 348                         free(p);
 349                 }
 350         }
 351         free(hashtab);
 352 }
 353
 354 static dir_hash_tab_t *
 355 dir_hash_init(
 356         xfs_fsize_t     size)
 357 {
 358         dir_hash_tab_t  *hashtab;
 359         int             hsize;
 360
 361         hsize = size / (16 * 4);
 362         if (hsize > 65536)
 363                 hsize = 63336;
 364         else if (hsize < 16)
 365                 hsize = 16;
 366         if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
 367                 do_error(_("calloc failed in dir_hash_init\n"));
 368         hashtab->size = hsize;
 369         hashtab->byhash = (dir_hash_ent_t**)((char *)hashtab +
 370                 sizeof(dir_hash_tab_t));
 371         hashtab->byaddr = (dir_hash_ent_t**)((char *)hashtab +
 372                 sizeof(dir_hash_tab_t) + sizeof(dir_hash_ent_t*) * hsize);
 373         return hashtab;
 374 }
 375
 376 static int
 377 dir_hash_see(
 378         dir_hash_tab_t          *hashtab,
 379         xfs_dahash_t            hash,
 380         xfs_dir2_dataptr_t      addr)
 381 {
 382         int                     i;
 383         dir_hash_ent_t          *p;
 384
 385         i = DIR_HASH_FUNC(hashtab, addr);
 386         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 387                 if (p->address != addr)
 388                         continue;
 389                 if (p->seen)
 390                         return DIR_HASH_CK_DUPLEAF;
 391                 if (p->junkit == 0 && p->hashval != hash)
 392                         return DIR_HASH_CK_BADHASH;
 393                 p->seen = 1;
 394                 return DIR_HASH_CK_OK;
 395         }
 396         return DIR_HASH_CK_NODATA;
 397 }
 398
 399 static void
 400 dir_hash_update_ftype(
 401         dir_hash_tab_t          *hashtab,
 402         xfs_dir2_dataptr_t      addr,
 403         uint8_t                 ftype)
 404 {
 405         int                     i;
 406         dir_hash_ent_t          *p;
 407
 408         i = DIR_HASH_FUNC(hashtab, addr);
 409         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 410                 if (p->address != addr)
 411                         continue;
 412                 p->name.type = ftype;
 413         }
 414 }
 415
 416 /*
 417  * checks to make sure leafs match a data entry, and that the stale
 418  * count is valid.
 419  */
 420 static int
 421 dir_hash_see_all(
 422         dir_hash_tab_t          *hashtab,
 423         xfs_dir2_leaf_entry_t   *ents,
 424         int                     count,
 425         int                     stale)
 426 {
 427         int                     i;
 428         int                     j;
 429         int                     rval;
 430
 431         for (i = j = 0; i < count; i++) {
 432                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 433                         j++;
 434                         continue;
 435                 }
 436                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 437                                         be32_to_cpu(ents[i].address));
 438                 if (rval != DIR_HASH_CK_OK)
 439                         return rval;
 440         }
 441         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 442 }
 443
 444 /*
 445  * Convert name pointers into locally allocated memory.
 446  * This must only be done after all the entries have been added.
 447  */
 448 static void
 449 dir_hash_dup_names(dir_hash_tab_t *hashtab)
 450 {
 451         unsigned char           *name;
 452         dir_hash_ent_t          *p;
 453
 454         if (hashtab->names_duped)
 455                 return;
 456
 457         for (p = hashtab->first; p; p = p->nextbyorder) {
 458                 name = malloc(p->name.len);
 459                 memcpy(name, p->name.name, p->name.len);
 460                 p->name.name = name;
 461         }
 462         hashtab->names_duped = 1;
 463 }
 464
 465 /*
 466  * Given a block number in a fork, return the next valid block number
 467  * (not a hole).
 468  * If this is the last block number then NULLFILEOFF is returned.
 469  *
 470  * This was originally in the kernel, but only used in xfs_repair.
 471  */
 472 static int
 473 bmap_next_offset(
 474         xfs_trans_t     *tp,                    /* transaction pointer */
 475         xfs_inode_t     *ip,                    /* incore inode */
 476         xfs_fileoff_t   *bnop,                  /* current block */
 477         int             whichfork)              /* data or attr fork */
 478 {
 479         xfs_fileoff_t   bno;                    /* current block */
 480         int             error;                  /* error return value */
 481         xfs_bmbt_irec_t got;                    /* current extent value */
 482         xfs_ifork_t     *ifp;                   /* inode fork pointer */
 483         struct xfs_iext_cursor  icur;
 484
 485         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 486             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 487             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
 488                return EIO;
 489         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 490                 *bnop = NULLFILEOFF;
 491                 return 0;
 492         }
 493         ifp = XFS_IFORK_PTR(ip, whichfork);
 494         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 495             (error = -libxfs_iread_extents(tp, ip, whichfork)))
 496                 return error;
 497         bno = *bnop + 1;
 498         if (!libxfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
 499                 *bnop = NULLFILEOFF;
 500         else
 501                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 502         return 0;
 503 }
 504
 505
 506 static void
 507 res_failed(
 508         int     err)
 509 {
 510         if (err == ENOSPC) {
 511                 do_error(_("ran out of disk space!\n"));
 512         } else
 513                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 514 }
 515
 516 void
 517 mk_rbmino(xfs_mount_t *mp)
 518 {
 519         xfs_trans_t     *tp;
 520         xfs_inode_t     *ip;
 521         xfs_bmbt_irec_t *ep;
 522         xfs_fsblock_t   first;
 523         int             i;
 524         int             nmap;
 525         int             error;
 526         struct xfs_defer_ops    dfops;
 527         xfs_fileoff_t   bno;
 528         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 529         int             vers;
 530         int             times;
 531         uint            blocks;
 532
 533         /*
 534          * first set up inode
 535          */
 536         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 537         if (i)
 538                 res_failed(i);
 539
 540         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 541         if (error) {
 542                 do_error(
 543                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 544                         error);
 545         }
 546
 547         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 548         memset(&ip->i_d, 0, sizeof(ip->i_d));
 549
 550         VFS_I(ip)->i_mode = S_IFREG;
 551         ip->i_d.di_version = vers;
 552         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 553         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 554
 555         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 556
 557         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 558         if (ip->i_d.di_version == 3) {
 559                 VFS_I(ip)->i_version = 1;
 560                 ip->i_d.di_flags2 = 0;
 561                 times |= XFS_ICHGTIME_CREATE;
 562         }
 563         libxfs_trans_ichgtime(tp, ip, times);
 564
 565         /*
 566          * now the ifork
 567          */
 568         ip->i_df.if_flags = XFS_IFEXTENTS;
 569         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 570         ip->i_df.if_u1.if_root = NULL;
 571
 572         ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 573
 574         /*
 575          * commit changes
 576          */
 577         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 578         libxfs_trans_commit(tp);
 579
 580         /*
 581          * then allocate blocks for file and fill with zeroes (stolen
 582          * from mkfs)
 583          */
 584         blocks = mp->m_sb.sb_rbmblocks +
 585                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 586         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 587         if (error)
 588                 res_failed(error);
 589
 590         libxfs_trans_ijoin(tp, ip, 0);
 591         bno = 0;
 592         libxfs_defer_init(&dfops, &first);
 593         while (bno < mp->m_sb.sb_rbmblocks) {
 594                 nmap = XFS_BMAP_MAX_NMAP;
 595                 error = -libxfs_bmapi_write(tp, ip, bno,
 596                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 597                           0, &first, mp->m_sb.sb_rbmblocks,
 598                           map, &nmap, &dfops);
 599                 if (error) {
 600                         do_error(
 601                         _("couldn't allocate realtime bitmap, error = %d\n"),
 602                                 error);
 603                 }
 604                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 605                         libxfs_device_zero(mp->m_ddev_targp,
 606                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 607                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 608                         bno += ep->br_blockcount;
 609                 }
 610         }
 611         libxfs_defer_ijoin(&dfops, ip);
 612         error = -libxfs_defer_finish(&tp, &dfops);
 613         if (error) {
 614                 do_error(
 615                 _("allocation of the realtime bitmap failed, error = %d\n"),
 616                         error);
 617         }
 618         libxfs_trans_commit(tp);
 619         IRELE(ip);
 620 }
 621
 622 static int
 623 fill_rbmino(xfs_mount_t *mp)
 624 {
 625         xfs_buf_t       *bp;
 626         xfs_trans_t     *tp;
 627         xfs_inode_t     *ip;
 628         xfs_rtword_t    *bmp;
 629         xfs_fsblock_t   first;
 630         int             nmap;
 631         int             error;
 632         xfs_fileoff_t   bno;
 633         xfs_bmbt_irec_t map;
 634
 635         bmp = btmcompute;
 636         bno = 0;
 637
 638         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 639         if (error)
 640                 res_failed(error);
 641
 642         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 643         if (error) {
 644                 do_error(
 645                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 646                         error);
 647         }
 648
 649         first = NULLFSBLOCK;
 650         while (bno < mp->m_sb.sb_rbmblocks)  {
 651                 /*
 652                  * fill the file one block at a time
 653                  */
 654                 nmap = 1;
 655                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 656                                         &first, 1, &map, &nmap, NULL);
 657                 if (error || nmap != 1) {
 658                         do_error(
 659         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 660                                 bno, error);
 661                 }
 662
 663                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 664
 665                 error = -libxfs_trans_read_buf(
 666                                 mp, tp, mp->m_dev,
 667                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 668                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 669
 670                 if (error) {
 671                         do_warn(
 672 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 673                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 674                         return(1);
 675                 }
 676
 677                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 678
 679                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 680
 681                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 682                 bno++;
 683         }
 684
 685         libxfs_trans_commit(tp);
 686         IRELE(ip);
 687         return(0);
 688 }
 689
 690 static int
 691 fill_rsumino(xfs_mount_t *mp)
 692 {
 693         xfs_buf_t       *bp;
 694         xfs_trans_t     *tp;
 695         xfs_inode_t     *ip;
 696         xfs_suminfo_t   *smp;
 697         xfs_fsblock_t   first;
 698         int             nmap;
 699         int             error;
 700         xfs_fileoff_t   bno;
 701         xfs_fileoff_t   end_bno;
 702         xfs_bmbt_irec_t map;
 703
 704         smp = sumcompute;
 705         bno = 0;
 706         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 707
 708         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 709         if (error)
 710                 res_failed(error);
 711
 712         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 713         if (error) {
 714                 do_error(
 715                 _("couldn't iget realtime summary inode -- error - %d\n"),
 716                         error);
 717         }
 718
 719         first = NULLFSBLOCK;
 720         while (bno < end_bno)  {
 721                 /*
 722                  * fill the file one block at a time
 723                  */
 724                 nmap = 1;
 725                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 726                                         &first, 1, &map, &nmap, NULL);
 727                 if (error || nmap != 1) {
 728                         do_error(
 729         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 730                                 bno, error);
 731                 }
 732
 733                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 734
 735                 error = -libxfs_trans_read_buf(
 736                                 mp, tp, mp->m_dev,
 737                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 738                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 739
 740                 if (error) {
 741                         do_warn(
 742 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 743                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 744                         IRELE(ip);
 745                         return(1);
 746                 }
 747
 748                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 749
 750                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 751
 752                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 753                 bno++;
 754         }
 755
 756         libxfs_trans_commit(tp);
 757         IRELE(ip);
 758         return(0);
 759 }
 760
 761 static void
 762 mk_rsumino(xfs_mount_t *mp)
 763 {
 764         xfs_trans_t     *tp;
 765         xfs_inode_t     *ip;
 766         xfs_bmbt_irec_t *ep;
 767         xfs_fsblock_t   first;
 768         int             i;
 769         int             nmap;
 770         int             error;
 771         int             nsumblocks;
 772         struct xfs_defer_ops    dfops;
 773         xfs_fileoff_t   bno;
 774         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 775         int             vers;
 776         int             times;
 777         uint            blocks;
 778
 779         /*
 780          * first set up inode
 781          */
 782         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 783         if (i)
 784                 res_failed(i);
 785
 786         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 787         if (error) {
 788                 do_error(
 789                 _("couldn't iget realtime summary inode -- error - %d\n"),
 790                         error);
 791         }
 792
 793         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 794         memset(&ip->i_d, 0, sizeof(ip->i_d));
 795
 796         VFS_I(ip)->i_mode = S_IFREG;
 797         ip->i_d.di_version = vers;
 798         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 799         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 800
 801         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 802
 803         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 804         if (ip->i_d.di_version == 3) {
 805                 VFS_I(ip)->i_version = 1;
 806                 ip->i_d.di_flags2 = 0;
 807                 times |= XFS_ICHGTIME_CREATE;
 808         }
 809         libxfs_trans_ichgtime(tp, ip, times);
 810
 811         /*
 812          * now the ifork
 813          */
 814         ip->i_df.if_flags = XFS_IFEXTENTS;
 815         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 816         ip->i_df.if_u1.if_root = NULL;
 817
 818         ip->i_d.di_size = mp->m_rsumsize;
 819
 820         /*
 821          * commit changes
 822          */
 823         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 824         libxfs_trans_commit(tp);
 825
 826         /*
 827          * then allocate blocks for file and fill with zeroes (stolen
 828          * from mkfs)
 829          */
 830         libxfs_defer_init(&dfops, &first);
 831
 832         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 833         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 834         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 835         if (error)
 836                 res_failed(error);
 837
 838         libxfs_trans_ijoin(tp, ip, 0);
 839         bno = 0;
 840         libxfs_defer_init(&dfops, &first);
 841         while (bno < nsumblocks) {
 842                 nmap = XFS_BMAP_MAX_NMAP;
 843                 error = -libxfs_bmapi_write(tp, ip, bno,
 844                           (xfs_extlen_t)(nsumblocks - bno),
 845                           0, &first, nsumblocks, map, &nmap, &dfops);
 846                 if (error) {
 847                         do_error(
 848                 _("couldn't allocate realtime summary inode, error = %d\n"),
 849                                 error);
 850                 }
 851                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 852                         libxfs_device_zero(mp->m_ddev_targp,
 853                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 854                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 855                         bno += ep->br_blockcount;
 856                 }
 857         }
 858         libxfs_defer_ijoin(&dfops, ip);
 859         error = -libxfs_defer_finish(&tp, &dfops);
 860         if (error) {
 861                 do_error(
 862         _("allocation of the realtime summary ino failed, error = %d\n"),
 863                         error);
 864         }
 865         libxfs_trans_commit(tp);
 866         IRELE(ip);
 867 }
 868
 869 /*
 870  * makes a new root directory.
 871  */
 872 static void
 873 mk_root_dir(xfs_mount_t *mp)
 874 {
 875         xfs_trans_t     *tp;
 876         xfs_inode_t     *ip;
 877         int             i;
 878         int             error;
 879         const mode_t    mode = 0755;
 880         ino_tree_node_t *irec;
 881         int             vers;
 882         int             times;
 883
 884         ip = NULL;
 885         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 886         if (i)
 887                 res_failed(i);
 888
 889         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, 0, &ip);
 890         if (error) {
 891                 do_error(_("could not iget root inode -- error - %d\n"), error);
 892         }
 893
 894         /*
 895          * take care of the core -- initialization from xfs_ialloc()
 896          */
 897         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 898         memset(&ip->i_d, 0, sizeof(ip->i_d));
 899
 900         VFS_I(ip)->i_mode = mode|S_IFDIR;
 901         ip->i_d.di_version = vers;
 902         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 903         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 904
 905         set_nlink(VFS_I(ip), 1);        /* account for . */
 906
 907         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 908         if (ip->i_d.di_version == 3) {
 909                 VFS_I(ip)->i_version = 1;
 910                 ip->i_d.di_flags2 = 0;
 911                 times |= XFS_ICHGTIME_CREATE;
 912         }
 913         libxfs_trans_ichgtime(tp, ip, times);
 914
 915         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 916
 917         /*
 918          * now the ifork
 919          */
 920         ip->i_df.if_flags = XFS_IFEXTENTS;
 921         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 922         ip->i_df.if_u1.if_root = NULL;
 923
 924
 925
 926         /*
 927          * initialize the directory
 928          */
 929         ip->d_ops = mp->m_dir_inode_ops;
 930         libxfs_dir_init(tp, ip, ip);
 931
 932         libxfs_trans_commit(tp);
 933         IRELE(ip);
 934
 935         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 936                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 937         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 938                                 irec->ino_startnum);
 939 }
 940
 941 /*
 942  * orphanage name == lost+found
 943  */
 944 static xfs_ino_t
 945 mk_orphanage(xfs_mount_t *mp)
 946 {
 947         xfs_ino_t       ino;
 948         xfs_trans_t     *tp;
 949         xfs_inode_t     *ip;
 950         xfs_inode_t     *pip;
 951         xfs_fsblock_t   first;
 952         ino_tree_node_t *irec;
 953         int             ino_offset = 0;
 954         int             i;
 955         int             error;
 956         struct xfs_defer_ops    dfops;
 957         const int       mode = 0755;
 958         int             nres;
 959         struct xfs_name xname;
 960
 961         /*
 962          * check for an existing lost+found first, if it exists, return
 963          * its inode. Otherwise, we can create it. Bad lost+found inodes
 964          * would have been cleared in phase3 and phase4.
 965          */
 966
 967         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 968                         &xfs_default_ifork_ops);
 969         if (i)
 970                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 971                         i, ORPHANAGE);
 972
 973         xname.name = (unsigned char *)ORPHANAGE;
 974         xname.len = strlen(ORPHANAGE);
 975         xname.type = XFS_DIR3_FT_DIR;
 976
 977         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 978                 return ino;
 979
 980         /*
 981          * could not be found, create it
 982          */
 983         libxfs_defer_init(&dfops, &first);
 984         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 985         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 986         if (i)
 987                 res_failed(i);
 988
 989         /*
 990          * use iget/ijoin instead of trans_iget because the ialloc
 991          * wrapper can commit the transaction and start a new one
 992          */
 993 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 994                         &xfs_default_ifork_ops);
 995         if (i)
 996                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 997                         i, ORPHANAGE);*/
 998
 999         error = -libxfs_inode_alloc(&tp, pip, mode|S_IFDIR,
1000                                         1, 0, &zerocr, &zerofsx, &ip);
1001         if (error) {
1002                 do_error(_("%s inode allocation failed %d\n"),
1003                         ORPHANAGE, error);
1004         }
1005         inc_nlink(VFS_I(ip));           /* account for . */
1006         ino = ip->i_ino;
1007
1008         irec = find_inode_rec(mp,
1009                         XFS_INO_TO_AGNO(mp, ino),
1010                         XFS_INO_TO_AGINO(mp, ino));
1011
1012         if (irec == NULL) {
1013                 /*
1014                  * This inode is allocated from a newly created inode
1015                  * chunk and therefore did not exist when inode chunks
1016                  * were processed in phase3. Add this group of inodes to
1017                  * the entry avl tree as if they were discovered in phase3.
1018                  */
1019                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
1020                                             XFS_INO_TO_AGINO(mp, ino));
1021                 alloc_ex_data(irec);
1022
1023                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
1024                         set_inode_free(irec, i);
1025         }
1026
1027         ino_offset = get_inode_offset(mp, ino, irec);
1028
1029         /*
1030          * Mark the inode allocated to lost+found as used in the AVL tree
1031          * so it is not skipped in phase 7
1032          */
1033         set_inode_used(irec, ino_offset);
1034         add_inode_ref(irec, ino_offset);
1035
1036         /*
1037          * now that we know the transaction will stay around,
1038          * add the root inode to it
1039          */
1040         libxfs_trans_ijoin(tp, pip, 0);
1041
1042         /*
1043          * create the actual entry
1044          */
1045         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, &first,
1046                                         nres);
1047         if (error)
1048                 do_error(
1049                 _("can't make %s, createname error %d\n"),
1050                         ORPHANAGE, error);
1051
1052         /*
1053          * bump up the link count in the root directory to account
1054          * for .. in the new directory
1055          */
1056         inc_nlink(VFS_I(pip));
1057         add_inode_ref(find_inode_rec(mp,
1058                                 XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
1059                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)), 0);
1060
1061
1062
1063         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
1064         libxfs_dir_init(tp, ip, pip);
1065         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1066
1067         libxfs_defer_ijoin(&dfops, ip);
1068         error = -libxfs_defer_finish(&tp, &dfops);
1069         if (error) {
1070                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1071                         ORPHANAGE, error);
1072         }
1073
1074
1075         libxfs_trans_commit(tp);
1076         IRELE(ip);
1077         IRELE(pip);
1078         add_inode_reached(irec,ino_offset);
1079
1080         return(ino);
1081 }
1082
1083 /*
1084  * move a file to the orphange.
1085  */
1086 static void
1087 mv_orphanage(
1088         xfs_mount_t             *mp,
1089         xfs_ino_t               ino,            /* inode # to be moved */
1090         int                     isa_dir)        /* 1 if inode is a directory */
1091 {
1092         xfs_inode_t             *orphanage_ip;
1093         xfs_ino_t               entry_ino_num;
1094         xfs_inode_t             *ino_p;
1095         xfs_trans_t             *tp;
1096         xfs_fsblock_t           first;
1097         struct xfs_defer_ops            dfops;
1098         int                     err;
1099         unsigned char           fname[MAXPATHLEN + 1];
1100         int                     nres;
1101         int                     incr;
1102         ino_tree_node_t         *irec;
1103         int                     ino_offset = 0;
1104         struct xfs_name         xname;
1105
1106         xname.name = fname;
1107         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1108                                 (unsigned long long)ino);
1109
1110         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip,
1111                         &xfs_default_ifork_ops);
1112         if (err)
1113                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1114         /*
1115          * Make sure the filename is unique in the lost+found
1116          */
1117         incr = 0;
1118         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1119                                                                 NULL) == 0)
1120                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1121                                         (unsigned long long)ino, ++incr);
1122
1123         /* Orphans may not have a proper parent, so use custom ops here */
1124         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &phase6_ifork_ops);
1125         if (err)
1126                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1127
1128         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1129
1130         if (isa_dir)  {
1131                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1132                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1133                 if (irec)
1134                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1135                                         irec->ino_startnum;
1136                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1137                        XFS_DIRENTER_SPACE_RES(mp, 2);
1138                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1139                                         &entry_ino_num, NULL);
1140                 if (err) {
1141                         ASSERT(err == ENOENT);
1142
1143                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1144                                                   nres, 0, 0, &tp);
1145                         if (err)
1146                                 do_error(
1147         _("space reservation failed (%d), filesystem may be out of space\n"),
1148                                         err);
1149
1150                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1151                         libxfs_trans_ijoin(tp, ino_p, 0);
1152
1153                         libxfs_defer_init(&dfops, &first);
1154                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1155                                                 ino, &first, nres);
1156                         if (err)
1157                                 do_error(
1158         _("name create failed in %s (%d), filesystem may be out of space\n"),
1159                                         ORPHANAGE, err);
1160
1161                         if (irec)
1162                                 add_inode_ref(irec, ino_offset);
1163                         else
1164                                 inc_nlink(VFS_I(orphanage_ip));
1165                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1166
1167                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1168                                         orphanage_ino, &first, nres);
1169                         if (err)
1170                                 do_error(
1171         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1172                                         err);
1173
1174                         inc_nlink(VFS_I(ino_p));
1175                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1176
1177                         libxfs_defer_ijoin(&dfops, ino_p);
1178                         err = -libxfs_defer_finish(&tp, &dfops);
1179                         if (err)
1180                                 do_error(
1181         _("bmap finish failed (err - %d), filesystem may be out of space\n"),
1182                                         err);
1183
1184                         libxfs_trans_commit(tp);
1185                 } else  {
1186                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1187                                                   nres, 0, 0, &tp);
1188                         if (err)
1189                                 do_error(
1190         _("space reservation failed (%d), filesystem may be out of space\n"),
1191                                         err);
1192
1193                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1194                         libxfs_trans_ijoin(tp, ino_p, 0);
1195
1196                         libxfs_defer_init(&dfops, &first);
1197
1198                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1199                                                 ino, &first, nres);
1200                         if (err)
1201                                 do_error(
1202         _("name create failed in %s (%d), filesystem may be out of space\n"),
1203                                         ORPHANAGE, err);
1204
1205                         if (irec)
1206                                 add_inode_ref(irec, ino_offset);
1207                         else
1208                                 inc_nlink(VFS_I(orphanage_ip));
1209                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1210
1211                         /*
1212                          * don't replace .. value if it already points
1213                          * to us.  that'll pop a libxfs/kernel ASSERT.
1214                          */
1215                         if (entry_ino_num != orphanage_ino)  {
1216                                 err = -libxfs_dir_replace(tp, ino_p,
1217                                                 &xfs_name_dotdot, orphanage_ino,
1218                                                 &first, nres);
1219                                 if (err)
1220                                         do_error(
1221         _("name replace op failed (%d), filesystem may be out of space\n"),
1222                                                 err);
1223                         }
1224
1225                         libxfs_defer_ijoin(&dfops, ino_p);
1226                         err = -libxfs_defer_finish(&tp, &dfops);
1227                         if (err)
1228                                 do_error(
1229         _("bmap finish failed (%d), filesystem may be out of space\n"),
1230                                         err);
1231
1232                         libxfs_trans_commit(tp);
1233                 }
1234
1235         } else  {
1236                 /*
1237                  * use the remove log reservation as that's
1238                  * more accurate.  we're only creating the
1239                  * links, we're not doing the inode allocation
1240                  * also accounted for in the create
1241                  */
1242                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1243                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1244                                           nres, 0, 0, &tp);
1245                 if (err)
1246                         do_error(
1247         _("space reservation failed (%d), filesystem may be out of space\n"),
1248                                 err);
1249
1250                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1251                 libxfs_trans_ijoin(tp, ino_p, 0);
1252
1253                 libxfs_defer_init(&dfops, &first);
1254                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1255                                                 &first, nres);
1256                 if (err)
1257                         do_error(
1258         _("name create failed in %s (%d), filesystem may be out of space\n"),
1259                                 ORPHANAGE, err);
1260                 ASSERT(err == 0);
1261
1262                 set_nlink(VFS_I(ino_p), 1);
1263                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1264
1265                 libxfs_defer_ijoin(&dfops, ino_p);
1266                 err = -libxfs_defer_finish(&tp, &dfops);
1267                 if (err)
1268                         do_error(
1269         _("bmap finish failed (%d), filesystem may be out of space\n"),
1270                                 err);
1271
1272                 libxfs_trans_commit(tp);
1273         }
1274         IRELE(ino_p);
1275         IRELE(orphanage_ip);
1276 }
1277
1278 static int
1279 entry_junked(
1280         const char      *msg,
1281         const char      *iname,
1282         xfs_ino_t       ino1,
1283         xfs_ino_t       ino2)
1284 {
1285         do_warn(msg, iname, ino1, ino2);
1286         if (!no_modify) {
1287                 if (verbose)
1288                         do_warn(_(", marking entry to be junked\n"));
1289                 else
1290                         do_warn("\n");
1291         } else
1292                 do_warn(_(", would junk entry\n"));
1293         return !no_modify;
1294 }
1295
1296 /* Find and invalidate all the directory's buffers. */
1297 static int
1298 dir_binval(
1299         struct xfs_trans        *tp,
1300         struct xfs_inode        *ip,
1301         int                     whichfork)
1302 {
1303         struct xfs_iext_cursor  icur;
1304         struct xfs_bmbt_irec    rec;
1305         struct xfs_ifork        *ifp;
1306         struct xfs_da_geometry  *geo;
1307         struct xfs_buf          *bp;
1308         xfs_dablk_t             dabno, end_dabno;
1309         int                     error = 0;
1310
1311         if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
1312             ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
1313                 return 0;
1314
1315         geo = tp->t_mountp->m_dir_geo;
1316         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1317         for_each_xfs_iext(ifp, &icur, &rec) {
1318                 dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1319                                 geo->fsbcount - 1);
1320                 end_dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1321                                 rec.br_blockcount);
1322                 for (; dabno <= end_dabno; dabno += geo->fsbcount) {
1323                         bp = NULL;
1324                         error = -libxfs_da_get_buf(tp, ip, dabno, -2, &bp,
1325                                         whichfork);
1326                         if (error)
1327                                 return error;
1328                         if (!bp)
1329                                 continue;
1330                         libxfs_trans_binval(tp, bp);
1331                         libxfs_trans_brelse(tp, bp);
1332                 }
1333         }
1334
1335         return error;
1336 }
1337
1338 /*
1339  * Unexpected failure during the rebuild will leave the entries in
1340  * lost+found on the next run
1341  */
1342
1343 static void
1344 longform_dir2_rebuild(
1345         xfs_mount_t             *mp,
1346         xfs_ino_t               ino,
1347         xfs_inode_t             *ip,
1348         ino_tree_node_t         *irec,
1349         int                     ino_offset,
1350         dir_hash_tab_t          *hashtab)
1351 {
1352         int                     error;
1353         int                     nres;
1354         xfs_trans_t             *tp;
1355         xfs_fileoff_t           lastblock;
1356         xfs_fsblock_t           firstblock;
1357         struct xfs_defer_ops            dfops;
1358         xfs_inode_t             pip;
1359         dir_hash_ent_t          *p;
1360         int                     done;
1361
1362         /*
1363          * trash directory completely and rebuild from scratch using the
1364          * name/inode pairs in the hash table
1365          */
1366
1367         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1368
1369         /*
1370          * first attempt to locate the parent inode, if it can't be
1371          * found, set it to the root inode and it'll be moved to the
1372          * orphanage later (the inode number here needs to be valid
1373          * for the libxfs_dir_init() call).
1374          */
1375         pip.i_ino = get_inode_parent(irec, ino_offset);
1376         if (pip.i_ino == NULLFSINO ||
1377             libxfs_dir_ino_validate(mp, pip.i_ino))
1378                 pip.i_ino = mp->m_sb.sb_rootino;
1379
1380         libxfs_defer_init(&dfops, &firstblock);
1381
1382         nres = XFS_REMOVE_SPACE_RES(mp);
1383         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1384         if (error)
1385                 res_failed(error);
1386         libxfs_trans_ijoin(tp, ip, 0);
1387
1388         error = dir_binval(tp, ip, XFS_DATA_FORK);
1389         if (error)
1390                 res_failed(error);
1391
1392         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1393                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1394                         error);
1395
1396         /* free all data, leaf, node and freespace blocks */
1397         error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA, 0,
1398                                 &firstblock, &dfops, &done);
1399         if (error) {
1400                 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1401                 goto out_bmap_cancel;
1402         }
1403
1404         ASSERT(done);
1405
1406         error = -libxfs_dir_init(tp, ip, &pip);
1407         if (error) {
1408                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1409                 goto out_bmap_cancel;
1410         }
1411
1412         libxfs_defer_ijoin(&dfops, ip);
1413         error = -libxfs_defer_finish(&tp, &dfops);
1414
1415         libxfs_trans_commit(tp);
1416
1417         if (ino == mp->m_sb.sb_rootino)
1418                 need_root_dotdot = 0;
1419
1420         /* go through the hash list and re-add the inodes */
1421
1422         for (p = hashtab->first; p; p = p->nextbyorder) {
1423
1424                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1425                                 (p->name.len == 1 || (p->name.len == 2 &&
1426                                                 p->name.name[1] == '.'))))
1427                         continue;
1428
1429                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1430                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1431                                             nres, 0, 0, &tp);
1432                 if (error)
1433                         res_failed(error);
1434
1435                 libxfs_trans_ijoin(tp, ip, 0);
1436
1437                 libxfs_defer_init(&dfops, &firstblock);
1438                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1439                                                 &firstblock, nres);
1440                 if (error) {
1441                         do_warn(
1442 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1443                                 ino, error);
1444                         goto out_bmap_cancel;
1445                 }
1446
1447                 libxfs_defer_ijoin(&dfops, ip);
1448                 error = -libxfs_defer_finish(&tp, &dfops);
1449                 if (error) {
1450                         do_warn(
1451         _("bmap finish failed (%d), filesystem may be out of space\n"),
1452                                 error);
1453                         goto out_bmap_cancel;
1454                 }
1455
1456                 libxfs_trans_commit(tp);
1457         }
1458
1459         return;
1460
1461 out_bmap_cancel:
1462         libxfs_defer_cancel(&dfops);
1463         libxfs_trans_cancel(tp);
1464         return;
1465 }
1466
1467
1468 /*
1469  * Kill a block in a version 2 inode.
1470  * Makes its own transaction.
1471  */
1472 static void
1473 dir2_kill_block(
1474         xfs_mount_t     *mp,
1475         xfs_inode_t     *ip,
1476         xfs_dablk_t     da_bno,
1477         struct xfs_buf  *bp)
1478 {
1479         xfs_da_args_t   args;
1480         int             error;
1481         xfs_fsblock_t   firstblock;
1482         struct xfs_defer_ops    dfops;
1483         int             nres;
1484         xfs_trans_t     *tp;
1485
1486         nres = XFS_REMOVE_SPACE_RES(mp);
1487         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1488         if (error)
1489                 res_failed(error);
1490         libxfs_trans_ijoin(tp, ip, 0);
1491         libxfs_trans_bjoin(tp, bp);
1492         memset(&args, 0, sizeof(args));
1493         libxfs_defer_init(&dfops, &firstblock);
1494         args.dp = ip;
1495         args.trans = tp;
1496         args.firstblock = &firstblock;
1497         args.whichfork = XFS_DATA_FORK;
1498         args.geo = mp->m_dir_geo;
1499         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1500                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1501         else
1502                 error = -libxfs_dir2_shrink_inode(&args,
1503                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1504         if (error)
1505                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1506                         ip->i_ino, da_bno);
1507         libxfs_defer_ijoin(&dfops, ip);
1508         libxfs_defer_finish(&tp, &dfops);
1509         libxfs_trans_commit(tp);
1510 }
1511
1512 /*
1513  * process a data block, also checks for .. entry
1514  * and corrects it to match what we think .. should be
1515  */
1516 static void
1517 longform_dir2_entry_check_data(
1518         xfs_mount_t             *mp,
1519         xfs_inode_t             *ip,
1520         int                     *num_illegal,
1521         int                     *need_dot,
1522         ino_tree_node_t         *current_irec,
1523         int                     current_ino_offset,
1524         struct xfs_buf          **bpp,
1525         dir_hash_tab_t          *hashtab,
1526         freetab_t               **freetabp,
1527         xfs_dablk_t             da_bno,
1528         int                     isblock)
1529 {
1530         xfs_dir2_dataptr_t      addr;
1531         xfs_dir2_leaf_entry_t   *blp;
1532         struct xfs_buf          *bp;
1533         xfs_dir2_block_tail_t   *btp;
1534         struct xfs_dir2_data_hdr *d;
1535         xfs_dir2_db_t           db;
1536         xfs_dir2_data_entry_t   *dep;
1537         xfs_dir2_data_unused_t  *dup;
1538         struct xfs_dir2_data_free *bf;
1539         char                    *endptr;
1540         int                     error;
1541         xfs_fsblock_t           firstblock;
1542         struct xfs_defer_ops            dfops;
1543         char                    fname[MAXNAMELEN + 1];
1544         freetab_t               *freetab;
1545         int                     i;
1546         int                     ino_offset;
1547         xfs_ino_t               inum;
1548         ino_tree_node_t         *irec;
1549         int                     junkit;
1550         int                     lastfree;
1551         int                     len;
1552         int                     nbad;
1553         int                     needlog;
1554         int                     needscan;
1555         xfs_ino_t               parent;
1556         char                    *ptr;
1557         xfs_trans_t             *tp;
1558         int                     wantmagic;
1559         struct xfs_da_args      da = {
1560                 .dp = ip,
1561                 .geo = mp->m_dir_geo,
1562         };
1563
1564
1565         bp = *bpp;
1566         d = bp->b_addr;
1567         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1568         nbad = 0;
1569         needscan = needlog = 0;
1570         junkit = 0;
1571         freetab = *freetabp;
1572         if (isblock) {
1573                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1574                 blp = xfs_dir2_block_leaf_p(btp);
1575                 endptr = (char *)blp;
1576                 if (endptr > (char *)btp)
1577                         endptr = (char *)btp;
1578                 if (xfs_sb_version_hascrc(&mp->m_sb))
1579                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1580                 else
1581                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1582         } else {
1583                 endptr = (char *)d + mp->m_dir_geo->blksize;
1584                 if (xfs_sb_version_hascrc(&mp->m_sb))
1585                         wantmagic = XFS_DIR3_DATA_MAGIC;
1586                 else
1587                         wantmagic = XFS_DIR2_DATA_MAGIC;
1588         }
1589         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1590
1591         /* check for data block beyond expected end */
1592         if (freetab->naents <= db) {
1593                 struct freetab_ent e;
1594
1595                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1596                 if (!freetab) {
1597                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1598                                 __func__, FREETAB_SIZE(db + 1));
1599                 }
1600                 e.v = NULLDATAOFF;
1601                 e.s = 0;
1602                 for (i = freetab->naents; i < db; i++)
1603                         freetab->ents[i] = e;
1604                 freetab->naents = db + 1;
1605         }
1606
1607         /* check the data block */
1608         while (ptr < endptr) {
1609
1610                 /* check for freespace */
1611                 dup = (xfs_dir2_data_unused_t *)ptr;
1612                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1613
1614                         /* check for invalid freespace length */
1615                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1616                                         be16_to_cpu(dup->length) == 0 ||
1617                                         (be16_to_cpu(dup->length) &
1618                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1619                                 break;
1620
1621                         /* check for invalid tag */
1622                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1623                                                 (char *)dup - (char *)d)
1624                                 break;
1625
1626                         /* check for block with no data entries */
1627                         if ((ptr == (char *)M_DIROPS(mp)->data_entry_p(d)) &&
1628                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1629                                 junkit = 1;
1630                                 *num_illegal += 1;
1631                                 break;
1632                         }
1633
1634                         /* continue at the end of the freespace */
1635                         ptr += be16_to_cpu(dup->length);
1636                         if (ptr >= endptr)
1637                                 break;
1638                 }
1639
1640                 /* validate data entry size */
1641                 dep = (xfs_dir2_data_entry_t *)ptr;
1642                 if (ptr + M_DIROPS(mp)->data_entsize(dep->namelen) > endptr)
1643                         break;
1644                 if (be16_to_cpu(*M_DIROPS(mp)->data_entry_tag_p(dep)) !=
1645                                                 (char *)dep - (char *)d)
1646                         break;
1647                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1648         }
1649
1650         /* did we find an empty or corrupt block? */
1651         if (ptr != endptr) {
1652                 if (junkit) {
1653                         do_warn(
1654         _("empty data block %u in directory inode %" PRIu64 ": "),
1655                                 da_bno, ip->i_ino);
1656                 } else {
1657                         do_warn(_
1658         ("corrupt block %u in directory inode %" PRIu64 ": "),
1659                                 da_bno, ip->i_ino);
1660                 }
1661                 if (!no_modify) {
1662                         do_warn(_("junking block\n"));
1663                         dir2_kill_block(mp, ip, da_bno, bp);
1664                 } else {
1665                         do_warn(_("would junk block\n"));
1666                         libxfs_putbuf(bp);
1667                 }
1668                 freetab->ents[db].v = NULLDATAOFF;
1669                 *bpp = NULL;
1670                 return;
1671         }
1672
1673         /* update number of data blocks processed */
1674         if (freetab->nents < db + 1)
1675                 freetab->nents = db + 1;
1676
1677         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1678         if (error)
1679                 res_failed(error);
1680         da.trans = tp;
1681         libxfs_trans_ijoin(tp, ip, 0);
1682         libxfs_trans_bjoin(tp, bp);
1683         libxfs_trans_bhold(tp, bp);
1684         libxfs_defer_init(&dfops, &firstblock);
1685         if (be32_to_cpu(d->magic) != wantmagic) {
1686                 do_warn(
1687         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1688                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1689                 if (!no_modify) {
1690                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1691                         d->magic = cpu_to_be32(wantmagic);
1692                         needlog = 1;
1693                 } else
1694                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1695         }
1696         lastfree = 0;
1697         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1698         /*
1699          * look at each entry.  reference inode pointed to by each
1700          * entry in the incore inode tree.
1701          * if not a directory, set reached flag, increment link count
1702          * if a directory and reached, mark entry as to be deleted.
1703          * if a directory, check to see if recorded parent
1704          *      matches current inode #,
1705          *      if so, then set reached flag, increment link count
1706          *              of current and child dir inodes, push the child
1707          *              directory inode onto the directory stack.
1708          *      if current inode != parent, then mark entry to be deleted.
1709          */
1710         while (ptr < endptr) {
1711                 dup = (xfs_dir2_data_unused_t *)ptr;
1712                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1713                         if (lastfree) {
1714                                 do_warn(
1715         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1716                                         ip->i_ino, da_bno);
1717                                 if (!no_modify) {
1718
1719                                         do_warn(_("joining together\n"));
1720                                         len = be16_to_cpu(dup->length);
1721                                         libxfs_dir2_data_use_free(&da, bp, dup,
1722                                                 ptr - (char *)d, len, &needlog,
1723                                                 &needscan);
1724                                         libxfs_dir2_data_make_free(&da, bp,
1725                                                 ptr - (char *)d, len, &needlog,
1726                                                 &needscan);
1727                                 } else
1728                                         do_warn(_("would join together\n"));
1729                         }
1730                         ptr += be16_to_cpu(dup->length);
1731                         lastfree = 1;
1732                         continue;
1733                 }
1734                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1735                                                   ptr - (char *)d);
1736                 dep = (xfs_dir2_data_entry_t *)ptr;
1737                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1738                 inum = be64_to_cpu(dep->inumber);
1739                 lastfree = 0;
1740                 /*
1741                  * skip bogus entries (leading '/').  they'll be deleted
1742                  * later.  must still log it, else we leak references to
1743                  * buffers.
1744                  */
1745                 if (dep->name[0] == '/')  {
1746                         nbad++;
1747                         if (!no_modify)
1748                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1749                         continue;
1750                 }
1751
1752                 memmove(fname, dep->name, dep->namelen);
1753                 fname[dep->namelen] = '\0';
1754                 ASSERT(inum != NULLFSINO);
1755
1756                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1757                                         XFS_INO_TO_AGINO(mp, inum));
1758                 if (irec == NULL)  {
1759                         nbad++;
1760                         if (entry_junked(
1761         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1762                                         fname, ip->i_ino, inum)) {
1763                                 dep->name[0] = '/';
1764                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1765                         }
1766                         continue;
1767                 }
1768                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1769
1770                 /*
1771                  * if it's a free inode, blow out the entry.
1772                  * by now, any inode that we think is free
1773                  * really is free.
1774                  */
1775                 if (is_inode_free(irec, ino_offset))  {
1776                         nbad++;
1777                         if (entry_junked(
1778         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1779                                         fname, ip->i_ino, inum)) {
1780                                 dep->name[0] = '/';
1781                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1782                         }
1783                         continue;
1784                 }
1785
1786                 /*
1787                  * check if this inode is lost+found dir in the root
1788                  */
1789                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1790                         /*
1791                          * if it's not a directory, trash it
1792                          */
1793                         if (!inode_isadir(irec, ino_offset)) {
1794                                 nbad++;
1795                                 if (entry_junked(
1796         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1797                                                 ORPHANAGE, inum, ip->i_ino)) {
1798                                         dep->name[0] = '/';
1799                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1800                                 }
1801                                 continue;
1802                         }
1803                         /*
1804                          * if this is a dup, it will be picked up below,
1805                          * otherwise, mark it as the orphanage for later.
1806                          */
1807                         if (!orphanage_ino)
1808                                 orphanage_ino = inum;
1809                 }
1810
1811                 /*
1812                  * check for duplicate names in directory.
1813                  */
1814                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1815                                 dep->name, M_DIROPS(mp)->data_get_ftype(dep))) {
1816                         nbad++;
1817                         if (entry_junked(
1818         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1819                                         fname, inum, ip->i_ino)) {
1820                                 dep->name[0] = '/';
1821                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1822                         }
1823                         if (inum == orphanage_ino)
1824                                 orphanage_ino = 0;
1825                         continue;
1826                 }
1827
1828                 /*
1829                  * if just scanning to rebuild a directory due to a ".."
1830                  * update, just continue
1831                  */
1832                 if (dotdot_update)
1833                         continue;
1834
1835                 /*
1836                  * skip the '..' entry since it's checked when the
1837                  * directory is reached by something else.  if it never
1838                  * gets reached, it'll be moved to the orphanage and we'll
1839                  * take care of it then. If it doesn't exist at all, the
1840                  * directory needs to be rebuilt first before being added
1841                  * to the orphanage.
1842                  */
1843                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1844                                 dep->name[1] == '.') {
1845                         if (da_bno != 0) {
1846                                 /* ".." should be in the first block */
1847                                 nbad++;
1848                                 if (entry_junked(
1849         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1850                                                 inum, ip->i_ino)) {
1851                                         dep->name[0] = '/';
1852                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1853                                 }
1854                         }
1855                         continue;
1856                 }
1857                 ASSERT(no_modify || !verify_inum(mp, inum));
1858                 /*
1859                  * special case the . entry.  we know there's only one
1860                  * '.' and only '.' points to itself because bogus entries
1861                  * got trashed in phase 3 if there were > 1.
1862                  * bump up link count for '.' but don't set reached
1863                  * until we're actually reached by another directory
1864                  * '..' is already accounted for or will be taken care
1865                  * of when directory is moved to orphanage.
1866                  */
1867                 if (ip->i_ino == inum)  {
1868                         ASSERT(no_modify ||
1869                                (dep->name[0] == '.' && dep->namelen == 1));
1870                         add_inode_ref(current_irec, current_ino_offset);
1871                         if (da_bno != 0 ||
1872                             dep != M_DIROPS(mp)->data_entry_p(d)) {
1873                                 /* "." should be the first entry */
1874                                 nbad++;
1875                                 if (entry_junked(
1876         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1877                                                 fname, inum, ip->i_ino)) {
1878                                         dep->name[0] = '/';
1879                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1880                                 }
1881                         }
1882                         *need_dot = 0;
1883                         continue;
1884                 }
1885                 /*
1886                  * skip entries with bogus inumbers if we're in no modify mode
1887                  */
1888                 if (no_modify && verify_inum(mp, inum))
1889                         continue;
1890
1891                 /* validate ftype field if supported */
1892                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1893                         uint8_t dir_ftype;
1894                         uint8_t ino_ftype;
1895
1896                         dir_ftype = M_DIROPS(mp)->data_get_ftype(dep);
1897                         ino_ftype = get_inode_ftype(irec, ino_offset);
1898
1899                         if (dir_ftype != ino_ftype) {
1900                                 if (no_modify) {
1901                                         do_warn(
1902         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1903                                                 dir_ftype, ino_ftype,
1904                                                 ip->i_ino, inum);
1905                                 } else {
1906                                         do_warn(
1907         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1908                                                 dir_ftype, ino_ftype,
1909                                                 ip->i_ino, inum);
1910                                         M_DIROPS(mp)->data_put_ftype(dep,
1911                                                                 ino_ftype);
1912                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1913                                         dir_hash_update_ftype(hashtab, addr,
1914                                                               ino_ftype);
1915                                 }
1916                         }
1917                 }
1918
1919                 /*
1920                  * check easy case first, regular inode, just bump
1921                  * the link count and continue
1922                  */
1923                 if (!inode_isadir(irec, ino_offset))  {
1924                         add_inode_reached(irec, ino_offset);
1925                         continue;
1926                 }
1927                 parent = get_inode_parent(irec, ino_offset);
1928                 ASSERT(parent != 0);
1929                 junkit = 0;
1930                 /*
1931                  * bump up the link counts in parent and child
1932                  * directory but if the link doesn't agree with
1933                  * the .. in the child, blow out the entry.
1934                  * if the directory has already been reached,
1935                  * blow away the entry also.
1936                  */
1937                 if (is_inode_reached(irec, ino_offset))  {
1938                         junkit = 1;
1939                         do_warn(
1940 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1941                                 fname, ip->i_ino, inum);
1942                 } else if (parent == ip->i_ino)  {
1943                         add_inode_reached(irec, ino_offset);
1944                         add_inode_ref(current_irec, current_ino_offset);
1945                 } else if (parent == NULLFSINO) {
1946                         /* ".." was missing, but this entry refers to it,
1947                            so, set it as the parent and mark for rebuild */
1948                         do_warn(
1949         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1950                                 fname, ip->i_ino, inum);
1951                         set_inode_parent(irec, ino_offset, ip->i_ino);
1952                         add_inode_reached(irec, ino_offset);
1953                         add_inode_ref(current_irec, current_ino_offset);
1954                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1955                                                                 ino_offset);
1956                 } else  {
1957                         junkit = 1;
1958                         do_warn(
1959 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1960                                 fname, ip->i_ino, parent, inum);
1961                 }
1962                 if (junkit)  {
1963                         if (inum == orphanage_ino)
1964                                 orphanage_ino = 0;
1965                         nbad++;
1966                         if (!no_modify)  {
1967                                 dep->name[0] = '/';
1968                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1969                                 if (verbose)
1970                                         do_warn(
1971                                         _("\twill clear entry \"%s\"\n"),
1972                                                 fname);
1973                         } else  {
1974                                 do_warn(_("\twould clear entry \"%s\"\n"),
1975                                         fname);
1976                         }
1977                 }
1978         }
1979         *num_illegal += nbad;
1980         if (needscan)
1981                 libxfs_dir2_data_freescan_int(mp->m_dir_geo, M_DIROPS(mp),
1982                                 d, &i);
1983         if (needlog)
1984                 libxfs_dir2_data_log_header(&da, bp);
1985         libxfs_defer_ijoin(&dfops, ip);
1986         libxfs_defer_finish(&tp, &dfops);
1987         libxfs_trans_commit(tp);
1988
1989         /* record the largest free space in the freetab for later checking */
1990         bf = M_DIROPS(mp)->data_bestfree_p(d);
1991         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1992         freetab->ents[db].s = 0;
1993 }
1994
1995 /* check v5 metadata */
1996 static int
1997 __check_dir3_header(
1998         struct xfs_mount        *mp,
1999         struct xfs_buf          *bp,
2000         xfs_ino_t               ino,
2001         __be64                  owner,
2002         __be64                  blkno,
2003         uuid_t                  *uuid)
2004 {
2005
2006         /* verify owner */
2007         if (be64_to_cpu(owner) != ino) {
2008                 do_warn(
2009 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
2010                         ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
2011                 return 1;
2012         }
2013         /* verify block number */
2014         if (be64_to_cpu(blkno) != bp->b_bn) {
2015                 do_warn(
2016 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
2017                         bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
2018                 return 1;
2019         }
2020         /* verify uuid */
2021         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
2022                 do_warn(
2023 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
2024                         ino, bp->b_bn);
2025                 return 1;
2026         }
2027
2028         return 0;
2029 }
2030
2031 static int
2032 check_da3_header(
2033         struct xfs_mount        *mp,
2034         struct xfs_buf          *bp,
2035         xfs_ino_t               ino)
2036 {
2037         struct xfs_da3_blkinfo  *info = bp->b_addr;
2038
2039         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2040                                    &info->uuid);
2041 }
2042
2043 static int
2044 check_dir3_header(
2045         struct xfs_mount        *mp,
2046         struct xfs_buf          *bp,
2047         xfs_ino_t               ino)
2048 {
2049         struct xfs_dir3_blk_hdr *info = bp->b_addr;
2050
2051         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2052                                    &info->uuid);
2053 }
2054
2055 /*
2056  * Check contents of leaf-form block.
2057  */
2058 static int
2059 longform_dir2_check_leaf(
2060         xfs_mount_t             *mp,
2061         xfs_inode_t             *ip,
2062         dir_hash_tab_t          *hashtab,
2063         freetab_t               *freetab)
2064 {
2065         int                     badtail;
2066         __be16                  *bestsp;
2067         struct xfs_buf          *bp;
2068         xfs_dablk_t             da_bno;
2069         int                     i;
2070         xfs_dir2_leaf_t         *leaf;
2071         xfs_dir2_leaf_tail_t    *ltp;
2072         int                     seeval;
2073         struct xfs_dir2_leaf_entry *ents;
2074         struct xfs_dir3_icleaf_hdr leafhdr;
2075         int                     error;
2076         int                     fixit = 0;
2077
2078         da_bno = mp->m_dir_geo->leafblk;
2079         error = dir_read_buf(ip, da_bno, -1, &bp, &xfs_dir3_leaf1_buf_ops,
2080                              &fixit);
2081         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
2082                 do_warn(
2083         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
2084                         da_bno, ip->i_ino);
2085                 return 1;
2086         } else if (error) {
2087                 do_error(
2088         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
2089                         da_bno, ip->i_ino, error);
2090                 /* NOTREACHED */
2091         }
2092
2093         leaf = bp->b_addr;
2094         M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2095         ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2096         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
2097         bestsp = xfs_dir2_leaf_bests_p(ltp);
2098         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
2099               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
2100                                 leafhdr.forw || leafhdr.back ||
2101                                 leafhdr.count < leafhdr.stale ||
2102                                 leafhdr.count >
2103                                         M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2104                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2105                 do_warn(
2106         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2107                         da_bno, ip->i_ino);
2108                 libxfs_putbuf(bp);
2109                 return 1;
2110         }
2111
2112         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2113                 error = check_da3_header(mp, bp, ip->i_ino);
2114                 if (error) {
2115                         libxfs_putbuf(bp);
2116                         return error;
2117                 }
2118         }
2119
2120         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2121         if (dir_hash_check(hashtab, ip, seeval)) {
2122                 libxfs_putbuf(bp);
2123                 return 1;
2124         }
2125         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2126         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2127                 freetab->ents[i].s = 1;
2128                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2129         }
2130         if (badtail) {
2131                 do_warn(
2132         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2133                         da_bno, ip->i_ino);
2134                 libxfs_putbuf(bp);
2135                 return 1;
2136         }
2137         libxfs_putbuf(bp);
2138         return fixit;
2139 }
2140
2141 /*
2142  * Check contents of the node blocks (leaves)
2143  * Looks for matching hash values for the data entries.
2144  */
2145 static int
2146 longform_dir2_check_node(
2147         xfs_mount_t             *mp,
2148         xfs_inode_t             *ip,
2149         dir_hash_tab_t          *hashtab,
2150         freetab_t               *freetab)
2151 {
2152         struct xfs_buf          *bp;
2153         xfs_dablk_t             da_bno;
2154         xfs_dir2_db_t           fdb;
2155         xfs_dir2_free_t         *free;
2156         int                     i;
2157         xfs_dir2_leaf_t         *leaf;
2158         xfs_fileoff_t           next_da_bno;
2159         int                     seeval = 0;
2160         int                     used;
2161         struct xfs_dir2_leaf_entry *ents;
2162         struct xfs_dir3_icleaf_hdr leafhdr;
2163         struct xfs_dir3_icfree_hdr freehdr;
2164         __be16                  *bests;
2165         int                     error;
2166         int                     fixit = 0;
2167
2168         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2169                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2170                         da_bno = (xfs_dablk_t)next_da_bno) {
2171                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2172                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2173                         break;
2174
2175                 /*
2176                  * we need to use the da3 node verifier here as it handles the
2177                  * fact that reading the leaf hash tree blocks can return either
2178                  * leaf or node blocks and calls the correct verifier. If we get
2179                  * a node block, then we'll skip it below based on a magic
2180                  * number check.
2181                  */
2182                 error = dir_read_buf(ip, da_bno, -1, &bp,
2183                                      &xfs_da3_node_buf_ops, &fixit);
2184                 if (error) {
2185                         do_warn(
2186         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2187                                 da_bno, ip->i_ino, error);
2188                         return 1;
2189                 }
2190                 leaf = bp->b_addr;
2191                 M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2192                 ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2193                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2194                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2195                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2196                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2197                         do_warn(
2198         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2199                                 leafhdr.magic, da_bno, ip->i_ino);
2200                         libxfs_putbuf(bp);
2201                         return 1;
2202                 }
2203
2204                 /* check v5 metadata */
2205                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2206                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2207                         error = check_da3_header(mp, bp, ip->i_ino);
2208                         if (error) {
2209                                 libxfs_putbuf(bp);
2210                                 return error;
2211                         }
2212                 }
2213
2214                 /* ignore nodes */
2215                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2216                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2217                         libxfs_putbuf(bp);
2218                         continue;
2219                 }
2220
2221                 /*
2222                  * If there's a validator error, we need to ensure that we got
2223                  * the right ops on the buffer for when we write it back out.
2224                  */
2225                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2226                 if (leafhdr.count > M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2227                     leafhdr.count < leafhdr.stale) {
2228                         do_warn(
2229         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2230                                 da_bno, ip->i_ino);
2231                         libxfs_putbuf(bp);
2232                         return 1;
2233                 }
2234                 seeval = dir_hash_see_all(hashtab, ents,
2235                                         leafhdr.count, leafhdr.stale);
2236                 libxfs_putbuf(bp);
2237                 if (seeval != DIR_HASH_CK_OK)
2238                         return 1;
2239         }
2240         if (dir_hash_check(hashtab, ip, seeval))
2241                 return 1;
2242
2243         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2244              next_da_bno != NULLFILEOFF;
2245              da_bno = (xfs_dablk_t)next_da_bno) {
2246                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2247                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2248                         break;
2249
2250                 error = dir_read_buf(ip, da_bno, -1, &bp,
2251                                      &xfs_dir3_free_buf_ops, &fixit);
2252                 if (error) {
2253                         do_warn(
2254         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2255                                 da_bno, ip->i_ino, error);
2256                         return 1;
2257                 }
2258                 free = bp->b_addr;
2259                 M_DIROPS(mp)->free_hdr_from_disk(&freehdr, free);
2260                 bests = M_DIROPS(mp)->free_bests_p(free);
2261                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2262                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2263                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2264                     freehdr.firstdb !=
2265                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2266                         M_DIROPS(mp)->free_max_bests(mp->m_dir_geo) ||
2267                     freehdr.nvalid < freehdr.nused) {
2268                         do_warn(
2269         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2270                                 da_bno, ip->i_ino);
2271                         libxfs_putbuf(bp);
2272                         return 1;
2273                 }
2274
2275                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2276                         error = check_dir3_header(mp, bp, ip->i_ino);
2277                         if (error) {
2278                                 libxfs_putbuf(bp);
2279                                 return error;
2280                         }
2281                 }
2282                 for (i = used = 0; i < freehdr.nvalid; i++) {
2283                         if (i + freehdr.firstdb >= freetab->nents ||
2284                                         freetab->ents[i + freehdr.firstdb].v !=
2285                                                 be16_to_cpu(bests[i])) {
2286                                 do_warn(
2287         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2288                                         da_bno, i, ip->i_ino);
2289                                 libxfs_putbuf(bp);
2290                                 return 1;
2291                         }
2292                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2293                         freetab->ents[i + freehdr.firstdb].s = 1;
2294                 }
2295                 if (used != freehdr.nused) {
2296                         do_warn(
2297         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2298                                 da_bno, ip->i_ino);
2299                         libxfs_putbuf(bp);
2300                         return 1;
2301                 }
2302                 libxfs_putbuf(bp);
2303         }
2304         for (i = 0; i < freetab->nents; i++) {
2305                 if ((freetab->ents[i].s == 0) &&
2306                     (freetab->ents[i].v != NULLDATAOFF)) {
2307                         do_warn(
2308         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2309                                 i, ip->i_ino);
2310                         return 1;
2311                 }
2312         }
2313         return fixit;
2314 }
2315
2316 /*
2317  * If a directory is corrupt, we need to read in as many entries as possible,
2318  * destroy the entry and create a new one with recovered name/inode pairs.
2319  * (ie. get libxfs to do all the grunt work)
2320  */
2321 static void
2322 longform_dir2_entry_check(xfs_mount_t   *mp,
2323                         xfs_ino_t       ino,
2324                         xfs_inode_t     *ip,
2325                         int             *num_illegal,
2326                         int             *need_dot,
2327                         ino_tree_node_t *irec,
2328                         int             ino_offset,
2329                         dir_hash_tab_t  *hashtab)
2330 {
2331         struct xfs_buf          **bplist;
2332         xfs_dablk_t             da_bno;
2333         freetab_t               *freetab;
2334         int                     num_bps;
2335         int                     i;
2336         int                     isblock;
2337         int                     isleaf;
2338         xfs_fileoff_t           next_da_bno;
2339         int                     seeval;
2340         int                     fixit = 0;
2341         xfs_dir2_db_t           db;
2342         struct xfs_da_args      args;
2343
2344         *need_dot = 1;
2345         freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2346         if (!freetab) {
2347                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2348                         __func__,
2349                         FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2350                 exit(1);
2351         }
2352         freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2353         freetab->nents = 0;
2354         for (i = 0; i < freetab->naents; i++) {
2355                 freetab->ents[i].v = NULLDATAOFF;
2356                 freetab->ents[i].s = 0;
2357         }
2358         num_bps = freetab->naents;
2359         bplist = calloc(num_bps, sizeof(struct xfs_buf*));
2360         if (!bplist)
2361                 do_error(_("calloc failed in %s (%zu bytes)\n"),
2362                         __func__, num_bps * sizeof(struct xfs_buf*));
2363
2364         /* is this a block, leaf, or node directory? */
2365         args.dp = ip;
2366         args.geo = mp->m_dir_geo;
2367         libxfs_dir2_isblock(&args, &isblock);
2368         libxfs_dir2_isleaf(&args, &isleaf);
2369
2370         /* check directory "data" blocks (ie. name/inode pairs) */
2371         for (da_bno = 0, next_da_bno = 0;
2372              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2373              da_bno = (xfs_dablk_t)next_da_bno) {
2374                 const struct xfs_buf_ops *ops;
2375                 int                      error;
2376                 struct xfs_dir2_data_hdr *d;
2377
2378                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2379                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) {
2380                         /*
2381                          * if this is the first block, there isn't anything we
2382                          * can recover so we just trash it.
2383                          */
2384                          if (da_bno == 0) {
2385                                 fixit++;
2386                                 goto out_fix;
2387                         }
2388                         break;
2389                 }
2390
2391                 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2392                 if (db >= num_bps) {
2393                         /* more data blocks than expected */
2394                         num_bps = db + 1;
2395                         bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
2396                         if (!bplist)
2397                                 do_error(_("realloc failed in %s (%zu bytes)\n"),
2398                                         __func__,
2399                                         num_bps * sizeof(struct xfs_buf*));
2400                 }
2401
2402                 if (isblock)
2403                         ops = &xfs_dir3_block_buf_ops;
2404                 else
2405                         ops = &xfs_dir3_data_buf_ops;
2406
2407                 error = dir_read_buf(ip, da_bno, -1, &bplist[db], ops, &fixit);
2408                 if (error) {
2409                         do_warn(
2410         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2411                                 da_bno, ino, error);
2412                         *num_illegal += 1;
2413
2414                         /*
2415                          * we try to read all "data" blocks, but if we are in
2416                          * block form and we fail, there isn't anything else to
2417                          * read, and nothing we can do but trash it.
2418                          */
2419                         if (isblock) {
2420                                 fixit++;
2421                                 goto out_fix;
2422                         }
2423                         continue;
2424                 }
2425
2426                 /* check v5 metadata */
2427                 d = bplist[db]->b_addr;
2428                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2429                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2430                         struct xfs_buf           *bp = bplist[db];
2431
2432                         error = check_dir3_header(mp, bp, ino);
2433                         if (error) {
2434                                 fixit++;
2435                                 continue;
2436                         }
2437                 }
2438
2439                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2440                                 irec, ino_offset, &bplist[db], hashtab,
2441                                 &freetab, da_bno, isblock);
2442         }
2443         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2444
2445         if (!dotdot_update) {
2446                 /* check btree and freespace */
2447                 if (isblock) {
2448                         struct xfs_dir2_data_hdr *block;
2449                         xfs_dir2_block_tail_t   *btp;
2450                         xfs_dir2_leaf_entry_t   *blp;
2451
2452                         block = bplist[0]->b_addr;
2453                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2454                         blp = xfs_dir2_block_leaf_p(btp);
2455                         seeval = dir_hash_see_all(hashtab, blp,
2456                                                 be32_to_cpu(btp->count),
2457                                                 be32_to_cpu(btp->stale));
2458                         if (dir_hash_check(hashtab, ip, seeval))
2459                                 fixit |= 1;
2460                 } else if (isleaf) {
2461                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2462                                                                 freetab);
2463                 } else {
2464                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2465                                                                 freetab);
2466                 }
2467         }
2468 out_fix:
2469         if (!no_modify && (fixit || dotdot_update)) {
2470                 dir_hash_dup_names(hashtab);
2471                 for (i = 0; i < num_bps; i++)
2472                         if (bplist[i])
2473                                 libxfs_putbuf(bplist[i]);
2474                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2475                 *num_illegal = 0;
2476                 *need_dot = 0;
2477         } else {
2478                 for (i = 0; i < num_bps; i++)
2479                         if (bplist[i])
2480                                 libxfs_putbuf(bplist[i]);
2481         }
2482
2483         free(bplist);
2484         free(freetab);
2485 }
2486
2487 /*
2488  * shortform directory v2 processing routines -- entry verification and
2489  * bad entry deletion (pruning).
2490  */
2491 static struct xfs_dir2_sf_entry *
2492 shortform_dir2_junk(
2493         struct xfs_mount        *mp,
2494         struct xfs_dir2_sf_hdr  *sfp,
2495         struct xfs_dir2_sf_entry *sfep,
2496         xfs_ino_t               lino,
2497         int                     *max_size,
2498         int                     *index,
2499         int                     *bytes_deleted,
2500         int                     *ino_dirty)
2501 {
2502         struct xfs_dir2_sf_entry *next_sfep;
2503         int                     next_len;
2504         int                     next_elen;
2505
2506         if (lino == orphanage_ino)
2507                 orphanage_ino = 0;
2508
2509         next_elen = M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen);
2510         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2511
2512         /*
2513          * if we are just checking, simply return the pointer to the next entry
2514          * here so that the checking loop can continue.
2515          */
2516         if (no_modify) {
2517                 do_warn(_("would junk entry\n"));
2518                 return next_sfep;
2519         }
2520
2521         /*
2522          * now move all the remaining entries down over the junked entry and
2523          * clear the newly unused bytes at the tail of the directory region.
2524          */
2525         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2526         *max_size -= next_elen;
2527         *bytes_deleted += next_elen;
2528
2529         memmove(sfep, next_sfep, next_len);
2530         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2531         sfp->count -= 1;
2532         *ino_dirty = 1;
2533
2534         /*
2535          * WARNING:  drop the index i by one so it matches the decremented count
2536          * for accurate comparisons in the loop test
2537          */
2538         (*index)--;
2539
2540         if (verbose)
2541                 do_warn(_("junking entry\n"));
2542         else
2543                 do_warn("\n");
2544         return sfep;
2545 }
2546
2547 static void
2548 shortform_dir2_entry_check(xfs_mount_t  *mp,
2549                         xfs_ino_t       ino,
2550                         xfs_inode_t     *ip,
2551                         int             *ino_dirty,
2552                         ino_tree_node_t *current_irec,
2553                         int             current_ino_offset,
2554                         dir_hash_tab_t  *hashtab)
2555 {
2556         xfs_ino_t               lino;
2557         xfs_ino_t               parent;
2558         struct xfs_dir2_sf_hdr  *sfp;
2559         struct xfs_dir2_sf_entry *sfep;
2560         struct xfs_dir2_sf_entry *next_sfep;
2561         struct xfs_ifork        *ifp;
2562         struct ino_tree_node    *irec;
2563         int                     max_size;
2564         int                     ino_offset;
2565         int                     i;
2566         int                     bad_sfnamelen;
2567         int                     namelen;
2568         int                     bytes_deleted;
2569         char                    fname[MAXNAMELEN + 1];
2570         int                     i8;
2571
2572         ifp = &ip->i_df;
2573         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2574         *ino_dirty = 0;
2575         bytes_deleted = 0;
2576
2577         max_size = ifp->if_bytes;
2578         ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2579
2580         /*
2581          * if just rebuild a directory due to a "..", update and return
2582          */
2583         if (dotdot_update) {
2584                 parent = get_inode_parent(current_irec, current_ino_offset);
2585                 if (no_modify) {
2586                         do_warn(
2587         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2588                                 ino, parent);
2589                 } else {
2590                         do_warn(
2591         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2592                                 ino, parent);
2593                         M_DIROPS(mp)->sf_put_parent_ino(sfp, parent);
2594                         *ino_dirty = 1;
2595                 }
2596                 return;
2597         }
2598
2599         /*
2600          * no '.' entry in shortform dirs, just bump up ref count by 1
2601          * '..' was already (or will be) accounted for and checked when
2602          * the directory is reached or will be taken care of when the
2603          * directory is moved to orphanage.
2604          */
2605         add_inode_ref(current_irec, current_ino_offset);
2606
2607         /*
2608          * Initialise i8 counter -- the parent inode number counts as well.
2609          */
2610         i8 = M_DIROPS(mp)->sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2611
2612         /*
2613          * now run through entries, stop at first bad entry, don't need
2614          * to skip over '..' since that's encoded in its own field and
2615          * no need to worry about '.' since it doesn't exist.
2616          */
2617         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2618
2619         for (i = 0; i < sfp->count && max_size >
2620                                         (intptr_t)next_sfep - (intptr_t)sfp;
2621                         sfep = next_sfep, i++)  {
2622                 bad_sfnamelen = 0;
2623
2624                 lino = M_DIROPS(mp)->sf_get_ino(sfp, sfep);
2625
2626                 namelen = sfep->namelen;
2627
2628                 ASSERT(no_modify || namelen > 0);
2629
2630                 if (no_modify && namelen == 0)  {
2631                         /*
2632                          * if we're really lucky, this is
2633                          * the last entry in which case we
2634                          * can use the dir size to set the
2635                          * namelen value.  otherwise, forget
2636                          * it because we're not going to be
2637                          * able to find the next entry.
2638                          */
2639                         bad_sfnamelen = 1;
2640
2641                         if (i == sfp->count - 1)  {
2642                                 namelen = ip->i_d.di_size -
2643                                         ((intptr_t) &sfep->name[0] -
2644                                          (intptr_t) sfp);
2645                         } else  {
2646                                 /*
2647                                  * don't process the rest of the directory,
2648                                  * break out of processing loop
2649                                  */
2650                                 break;
2651                         }
2652                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2653                                 + M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)
2654                                 > ip->i_d.di_size)  {
2655                         bad_sfnamelen = 1;
2656
2657                         if (i == sfp->count - 1)  {
2658                                 namelen = ip->i_d.di_size -
2659                                         ((intptr_t) &sfep->name[0] -
2660                                          (intptr_t) sfp);
2661                         } else  {
2662                                 /*
2663                                  * don't process the rest of the directory,
2664                                  * break out of processing loop
2665                                  */
2666                                 break;
2667                         }
2668                 }
2669
2670                 memmove(fname, sfep->name, sfep->namelen);
2671                 fname[sfep->namelen] = '\0';
2672
2673                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2674                 ASSERT(no_modify || !verify_inum(mp, lino));
2675
2676                 /*
2677                  * Also skip entries with bogus inode numbers if we're
2678                  * in no modify mode.
2679                  */
2680
2681                 if (no_modify && verify_inum(mp, lino))  {
2682                         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2683                         continue;
2684                 }
2685
2686                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2687                                         XFS_INO_TO_AGINO(mp, lino));
2688
2689                 if (irec == NULL)  {
2690                         do_warn(
2691         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2692                                 fname, ino, lino);
2693                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2694                                                 &max_size, &i, &bytes_deleted,
2695                                                 ino_dirty);
2696                         continue;
2697                 }
2698
2699                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2700
2701                 /*
2702                  * if it's a free inode, blow out the entry.
2703                  * by now, any inode that we think is free
2704                  * really is free.
2705                  */
2706                 if (is_inode_free(irec, ino_offset))  {
2707                         do_warn(
2708         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2709                                 fname, ino, lino);
2710                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2711                                                 &max_size, &i, &bytes_deleted,
2712                                                 ino_dirty);
2713                         continue;
2714                 }
2715                 /*
2716                  * check if this inode is lost+found dir in the root
2717                  */
2718                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2719                         /*
2720                          * if it's not a directory, trash it
2721                          */
2722                         if (!inode_isadir(irec, ino_offset)) {
2723                                 do_warn(
2724         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2725                                         ORPHANAGE, lino, ino);
2726                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2727                                                 lino, &max_size, &i,
2728                                                 &bytes_deleted, ino_dirty);
2729                                 continue;
2730                         }
2731                         /*
2732                          * if this is a dup, it will be picked up below,
2733                          * otherwise, mark it as the orphanage for later.
2734                          */
2735                         if (!orphanage_ino)
2736                                 orphanage_ino = lino;
2737                 }
2738                 /*
2739                  * check for duplicate names in directory.
2740                  */
2741                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2742                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2743                                 lino, sfep->namelen, sfep->name,
2744                                 M_DIROPS(mp)->sf_get_ftype(sfep))) {
2745                         do_warn(
2746 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2747                                 fname, lino, ino);
2748                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2749                                                 &max_size, &i, &bytes_deleted,
2750                                                 ino_dirty);
2751                         continue;
2752                 }
2753
2754                 if (!inode_isadir(irec, ino_offset))  {
2755                         /*
2756                          * check easy case first, regular inode, just bump
2757                          * the link count
2758                          */
2759                         add_inode_reached(irec, ino_offset);
2760                 } else  {
2761                         parent = get_inode_parent(irec, ino_offset);
2762
2763                         /*
2764                          * bump up the link counts in parent and child.
2765                          * directory but if the link doesn't agree with
2766                          * the .. in the child, blow out the entry
2767                          */
2768                         if (is_inode_reached(irec, ino_offset))  {
2769                                 do_warn(
2770         _("entry \"%s\" in directory inode %" PRIu64
2771           " references already connected inode %" PRIu64 ".\n"),
2772                                         fname, ino, lino);
2773                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2774                                                 lino, &max_size, &i,
2775                                                 &bytes_deleted, ino_dirty);
2776                                 continue;
2777                         } else if (parent == ino)  {
2778                                 add_inode_reached(irec, ino_offset);
2779                                 add_inode_ref(current_irec, current_ino_offset);
2780                         } else if (parent == NULLFSINO) {
2781                                 /* ".." was missing, but this entry refers to it,
2782                                 so, set it as the parent and mark for rebuild */
2783                                 do_warn(
2784         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2785                                         fname, ino, lino);
2786                                 set_inode_parent(irec, ino_offset, ino);
2787                                 add_inode_reached(irec, ino_offset);
2788                                 add_inode_ref(current_irec, current_ino_offset);
2789                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2790                                                         irec, ino_offset);
2791                         } else  {
2792                                 do_warn(
2793         _("entry \"%s\" in directory inode %" PRIu64
2794           " not consistent with .. value (%" PRIu64
2795           ") in inode %" PRIu64 ",\n"),
2796                                         fname, ino, parent, lino);
2797                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2798                                                 lino, &max_size, &i,
2799                                                 &bytes_deleted, ino_dirty);
2800                                 continue;
2801                         }
2802                 }
2803
2804                 /* validate ftype field if supported */
2805                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2806                         uint8_t dir_ftype;
2807                         uint8_t ino_ftype;
2808
2809                         dir_ftype = M_DIROPS(mp)->sf_get_ftype(sfep);
2810                         ino_ftype = get_inode_ftype(irec, ino_offset);
2811
2812                         if (dir_ftype != ino_ftype) {
2813                                 if (no_modify) {
2814                                         do_warn(
2815         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2816                                                 dir_ftype, ino_ftype,
2817                                                 ino, lino);
2818                                 } else {
2819                                         do_warn(
2820         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2821                                                 dir_ftype, ino_ftype,
2822                                                 ino, lino);
2823                                         M_DIROPS(mp)->sf_put_ftype(sfep,
2824                                                                 ino_ftype);
2825                                         dir_hash_update_ftype(hashtab,
2826                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2827                                                               ino_ftype);
2828                                         *ino_dirty = 1;
2829                                 }
2830                         }
2831                 }
2832
2833                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2834                         i8++;
2835
2836                 /*
2837                  * go onto next entry - we have to take entries with bad namelen
2838                  * into account in no modify mode since we calculate size based
2839                  * on next_sfep.
2840                  */
2841                 ASSERT(no_modify || bad_sfnamelen == 0);
2842                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2843                               (bad_sfnamelen
2844                                 ? M_DIROPS(mp)->sf_entsize(sfp, namelen)
2845                                 : M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)));
2846         }
2847
2848         if (sfp->i8count != i8) {
2849                 if (no_modify) {
2850                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2851                                 ino);
2852                 } else {
2853                         if (i8 == 0) {
2854                                 struct xfs_dir2_sf_entry *tmp_sfep;
2855
2856                                 tmp_sfep = next_sfep;
2857                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2858                                 bytes_deleted +=
2859                                         (intptr_t)next_sfep -
2860                                         (intptr_t)tmp_sfep;
2861                                 next_sfep = tmp_sfep;
2862                         } else
2863                                 sfp->i8count = i8;
2864                         *ino_dirty = 1;
2865                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2866                                 ino);
2867                 }
2868         }
2869
2870         /*
2871          * sync up sizes if required
2872          */
2873         if (*ino_dirty && bytes_deleted > 0)  {
2874                 ASSERT(!no_modify);
2875                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2876                 ip->i_d.di_size -= bytes_deleted;
2877         }
2878
2879         if (ip->i_d.di_size != ip->i_df.if_bytes)  {
2880                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2881                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2882                 ip->i_d.di_size = (xfs_fsize_t)
2883                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2884                 do_warn(
2885         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2886                         ip->i_d.di_size);
2887                 *ino_dirty = 1;
2888         }
2889 }
2890
2891 /*
2892  * processes all reachable inodes in directories
2893  */
2894 static void
2895 process_dir_inode(
2896         xfs_mount_t             *mp,
2897         xfs_agnumber_t          agno,
2898         ino_tree_node_t         *irec,
2899         int                     ino_offset)
2900 {
2901         xfs_ino_t               ino;
2902         struct xfs_defer_ops            dfops;
2903         xfs_fsblock_t           first;
2904         xfs_inode_t             *ip;
2905         xfs_trans_t             *tp;
2906         dir_hash_tab_t          *hashtab;
2907         int                     need_dot;
2908         int                     dirty, num_illegal, error, nres;
2909
2910         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2911
2912         /*
2913          * open up directory inode, check all entries,
2914          * then call prune_dir_entries to remove all
2915          * remaining illegal directory entries.
2916          */
2917
2918         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2919
2920         error = -libxfs_iget(mp, NULL, ino, 0, &ip, &phase6_ifork_ops);
2921         if (error) {
2922                 if (!no_modify)
2923                         do_error(
2924         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2925                                 ino, error);
2926                 else  {
2927                         do_warn(
2928         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2929                                 ino, error);
2930                         /*
2931                          * see below for what we're doing if this
2932                          * is root.  Why do we need to do this here?
2933                          * to ensure that the root doesn't show up
2934                          * as being disconnected in the no_modify case.
2935                          */
2936                         if (mp->m_sb.sb_rootino == ino)  {
2937                                 add_inode_reached(irec, 0);
2938                                 add_inode_ref(irec, 0);
2939                         }
2940                 }
2941
2942                 add_inode_refchecked(irec, 0);
2943                 return;
2944         }
2945
2946         need_dot = dirty = num_illegal = 0;
2947
2948         if (mp->m_sb.sb_rootino == ino)  {
2949                 /*
2950                  * mark root inode reached and bump up
2951                  * link count for root inode to account
2952                  * for '..' entry since the root inode is
2953                  * never reached by a parent.  we know
2954                  * that root's '..' is always good --
2955                  * guaranteed by phase 3 and/or below.
2956                  */
2957                 add_inode_reached(irec, ino_offset);
2958         }
2959
2960         add_inode_refchecked(irec, ino_offset);
2961
2962         hashtab = dir_hash_init(ip->i_d.di_size);
2963
2964         /*
2965          * look for bogus entries
2966          */
2967         switch (ip->i_d.di_format)  {
2968                 case XFS_DINODE_FMT_EXTENTS:
2969                 case XFS_DINODE_FMT_BTREE:
2970                         /*
2971                          * also check for missing '.' in longform dirs.
2972                          * missing .. entries are added if required when
2973                          * the directory is connected to lost+found. but
2974                          * we need to create '.' entries here.
2975                          */
2976                         longform_dir2_entry_check(mp, ino, ip,
2977                                                 &num_illegal, &need_dot,
2978                                                 irec, ino_offset,
2979                                                 hashtab);
2980                         break;
2981
2982                 case XFS_DINODE_FMT_LOCAL:
2983                         /*
2984                          * using the remove reservation is overkill
2985                          * since at most we'll only need to log the
2986                          * inode but it's easier than wedging a
2987                          * new define in ourselves.
2988                          */
2989                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2990                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2991                                                     nres, 0, 0, &tp);
2992                         if (error)
2993                                 res_failed(error);
2994
2995                         libxfs_trans_ijoin(tp, ip, 0);
2996
2997                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2998                                                 irec, ino_offset,
2999                                                 hashtab);
3000
3001                         ASSERT(dirty == 0 || (dirty && !no_modify));
3002                         if (dirty)  {
3003                                 libxfs_trans_log_inode(tp, ip,
3004                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
3005                                 libxfs_trans_commit(tp);
3006                         } else  {
3007                                 libxfs_trans_cancel(tp);
3008                         }
3009                         break;
3010
3011                 default:
3012                         break;
3013         }
3014         dir_hash_done(hashtab);
3015
3016         /*
3017          * if we have to create a .. for /, do it now *before*
3018          * we delete the bogus entries, otherwise the directory
3019          * could transform into a shortform dir which would
3020          * probably cause the simulation to choke.  Even
3021          * if the illegal entries get shifted around, it's ok
3022          * because the entries are structurally intact and in
3023          * in hash-value order so the simulation won't get confused
3024          * if it has to move them around.
3025          */
3026         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3027                 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
3028
3029                 do_warn(_("recreating root directory .. entry\n"));
3030
3031                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
3032                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3033                                             nres, 0, 0, &tp);
3034                 if (error)
3035                         res_failed(error);
3036
3037                 libxfs_trans_ijoin(tp, ip, 0);
3038
3039                 libxfs_defer_init(&dfops, &first);
3040
3041                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
3042                                         ip->i_ino, &first, nres);
3043                 if (error)
3044                         do_error(
3045         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
3046
3047                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3048
3049                 libxfs_defer_ijoin(&dfops, ip);
3050                 error = -libxfs_defer_finish(&tp, &dfops);
3051                 ASSERT(error == 0);
3052                 libxfs_trans_commit(tp);
3053
3054                 need_root_dotdot = 0;
3055         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3056                 do_warn(_("would recreate root directory .. entry\n"));
3057         }
3058
3059         /*
3060          * if we need to create the '.' entry, do so only if
3061          * the directory is a longform dir.  if it's been
3062          * turned into a shortform dir, then the inode is ok
3063          * since shortform dirs have no '.' entry and the inode
3064          * has already been committed by prune_lf_dir_entry().
3065          */
3066         if (need_dot)  {
3067                 /*
3068                  * bump up our link count but don't
3069                  * bump up the inode link count.  chances
3070                  * are good that even though we lost '.'
3071                  * the inode link counts reflect '.' so
3072                  * leave the inode link count alone and if
3073                  * it turns out to be wrong, we'll catch
3074                  * that in phase 7.
3075                  */
3076                 add_inode_ref(irec, ino_offset);
3077
3078                 if (no_modify)  {
3079                         do_warn(
3080         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
3081                                 ino);
3082                 } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
3083                         /*
3084                          * need to create . entry in longform dir.
3085                          */
3086                         do_warn(
3087         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
3088
3089                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
3090                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3091                                                     nres, 0, 0, &tp);
3092                         if (error)
3093                                 res_failed(error);
3094
3095                         libxfs_trans_ijoin(tp, ip, 0);
3096
3097                         libxfs_defer_init(&dfops, &first);
3098
3099                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
3100                                         ip->i_ino, &first, nres);
3101                         if (error)
3102                                 do_error(
3103         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
3104                                         ino, error);
3105
3106                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3107
3108                         libxfs_defer_ijoin(&dfops, ip);
3109                         error = -libxfs_defer_finish(&tp, &dfops);
3110                         ASSERT(error == 0);
3111                         libxfs_trans_commit(tp);
3112                 }
3113         }
3114         IRELE(ip);
3115 }
3116
3117 /*
3118  * mark realtime bitmap and summary inodes as reached.
3119  * quota inode will be marked here as well
3120  */
3121 static void
3122 mark_standalone_inodes(xfs_mount_t *mp)
3123 {
3124         ino_tree_node_t         *irec;
3125         int                     offset;
3126
3127         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3128                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3129
3130         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3131                         irec->ino_startnum;
3132
3133         add_inode_reached(irec, offset);
3134
3135         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3136                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3137
3138         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3139                         irec->ino_startnum;
3140
3141         add_inode_reached(irec, offset);
3142
3143         if (fs_quotas)  {
3144                 if (mp->m_sb.sb_uquotino
3145                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3146                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3147                                                 mp->m_sb.sb_uquotino),
3148                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3149                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3150                                         - irec->ino_startnum;
3151                         add_inode_reached(irec, offset);
3152                 }
3153                 if (mp->m_sb.sb_gquotino
3154                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3155                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3156                                                 mp->m_sb.sb_gquotino),
3157                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3158                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3159                                         - irec->ino_startnum;
3160                         add_inode_reached(irec, offset);
3161                 }
3162                 if (mp->m_sb.sb_pquotino
3163                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3164                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3165                                                 mp->m_sb.sb_pquotino),
3166                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3167                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3168                                         - irec->ino_startnum;
3169                         add_inode_reached(irec, offset);
3170                 }
3171         }
3172 }
3173
3174 static void
3175 check_for_orphaned_inodes(
3176         xfs_mount_t             *mp,
3177         xfs_agnumber_t          agno,
3178         ino_tree_node_t         *irec)
3179 {
3180         int                     i;
3181         xfs_ino_t               ino;
3182
3183         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3184                 ASSERT(is_inode_confirmed(irec, i));
3185                 if (is_inode_free(irec, i))
3186                         continue;
3187
3188                 if (is_inode_reached(irec, i))
3189                         continue;
3190
3191                 ASSERT(inode_isadir(irec, i) ||
3192                         num_inode_references(irec, i) == 0);
3193
3194                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3195                 if (inode_isadir(irec, i))
3196                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3197                 else
3198                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3199                 if (!no_modify)  {
3200                         if (!orphanage_ino)
3201                                 orphanage_ino = mk_orphanage(mp);
3202                         do_warn(_("moving to %s\n"), ORPHANAGE);
3203                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3204                 } else  {
3205                         do_warn(_("would move to %s\n"), ORPHANAGE);
3206                 }
3207                 /*
3208                  * for read-only case, even though the inode isn't
3209                  * really reachable, set the flag (and bump our link
3210                  * count) anyway to fool phase 7
3211                  */
3212                 add_inode_reached(irec, i);
3213         }
3214 }
3215
3216 static void
3217 traverse_function(
3218         struct workqueue        *wq,
3219         xfs_agnumber_t          agno,
3220         void                    *arg)
3221 {
3222         ino_tree_node_t         *irec;
3223         int                     i;
3224         prefetch_args_t         *pf_args = arg;
3225
3226         wait_for_inode_prefetch(pf_args);
3227
3228         if (verbose)
3229                 do_log(_("        - agno = %d\n"), agno);
3230
3231         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3232                 if (irec->ino_isa_dir == 0)
3233                         continue;
3234
3235                 if (pf_args) {
3236                         sem_post(&pf_args->ra_count);
3237 #ifdef XR_PF_TRACE
3238                         sem_getvalue(&pf_args->ra_count, &i);
3239                         pftrace(
3240                 "processing inode chunk %p in AG %d (sem count = %d)",
3241                                 irec, agno, i);
3242 #endif
3243                 }
3244
3245                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3246                         if (inode_isadir(irec, i))
3247                                 process_dir_inode(wq->wq_ctx, agno, irec, i);
3248                 }
3249         }
3250         cleanup_inode_prefetch(pf_args);
3251 }
3252
3253 static void
3254 update_missing_dotdot_entries(
3255         xfs_mount_t             *mp)
3256 {
3257         dotdot_update_t         *dir;
3258
3259         /*
3260          * these entries parents were updated, rebuild them again
3261          * set dotdot_update flag so processing routines do not count links
3262          */
3263         dotdot_update = 1;
3264         while (!list_empty(&dotdot_update_list)) {
3265                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3266                                  list);
3267                 list_del(&dir->list);
3268                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3269                 free(dir);
3270         }
3271 }
3272
3273 static void
3274 traverse_ags(
3275         struct xfs_mount        *mp)
3276 {
3277         do_inode_prefetch(mp, 0, traverse_function, false, true);
3278 }
3279
3280 void
3281 phase6(xfs_mount_t *mp)
3282 {
3283         ino_tree_node_t         *irec;
3284         int                     i;
3285
3286         memset(&zerocr, 0, sizeof(struct cred));
3287         memset(&zerofsx, 0, sizeof(struct fsxattr));
3288         orphanage_ino = 0;
3289
3290         do_log(_("Phase 6 - check inode connectivity...\n"));
3291
3292         incore_ext_teardown(mp);
3293
3294         add_ino_ex_data(mp);
3295
3296         /*
3297          * verify existence of root directory - if we have to
3298          * make one, it's ok for the incore data structs not to
3299          * know about it since everything about it (and the other
3300          * inodes in its chunk if a new chunk was created) are ok
3301          */
3302         if (need_root_inode)  {
3303                 if (!no_modify)  {
3304                         do_warn(_("reinitializing root directory\n"));
3305                         mk_root_dir(mp);
3306                         need_root_inode = 0;
3307                         need_root_dotdot = 0;
3308                 } else  {
3309                         do_warn(_("would reinitialize root directory\n"));
3310                 }
3311         }
3312
3313         if (need_rbmino)  {
3314                 if (!no_modify)  {
3315                         do_warn(_("reinitializing realtime bitmap inode\n"));
3316                         mk_rbmino(mp);
3317                         need_rbmino = 0;
3318                 } else  {
3319                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3320                 }
3321         }
3322
3323         if (need_rsumino)  {
3324                 if (!no_modify)  {
3325                         do_warn(_("reinitializing realtime summary inode\n"));
3326                         mk_rsumino(mp);
3327                         need_rsumino = 0;
3328                 } else  {
3329                         do_warn(_("would reinitialize realtime summary inode\n"));
3330                 }
3331         }
3332
3333         if (!no_modify)  {
3334                 do_log(
3335 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3336                 if (fill_rbmino(mp))  {
3337                         do_warn(
3338                         _("Warning:  realtime bitmap may be inconsistent\n"));
3339                 }
3340
3341                 if (fill_rsumino(mp))  {
3342                         do_warn(
3343                         _("Warning:  realtime bitmap may be inconsistent\n"));
3344                 }
3345         }
3346
3347         mark_standalone_inodes(mp);
3348
3349         do_log(_("        - traversing filesystem ...\n"));
3350
3351         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3352                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3353
3354         /*
3355          * we always have a root inode, even if it's free...
3356          * if the root is free, forget it, lost+found is already gone
3357          */
3358         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3359                 need_root_inode = 1;
3360         }
3361
3362         /*
3363          * then process all inodes by walking incore inode tree
3364          */
3365         traverse_ags(mp);
3366
3367         /*
3368          * any directories that had updated ".." entries, rebuild them now
3369          */
3370         update_missing_dotdot_entries(mp);
3371
3372         do_log(_("        - traversal finished ...\n"));
3373         do_log(_("        - moving disconnected inodes to %s ...\n"),
3374                 ORPHANAGE);
3375
3376         /*
3377          * move all disconnected inodes to the orphanage
3378          */
3379         for (i = 0; i < glob_agcount; i++)  {
3380                 irec = findfirst_inode_rec(i);
3381                 while (irec != NULL)  {
3382                         check_for_orphaned_inodes(mp, i, irec);
3383                         irec = next_ino_rec(irec);
3384                 }
3385         }
3386 }