repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "prefetch.h"
  10 #include "avl.h"
  11 #include "globals.h"
  12 #include "agheader.h"
  13 #include "incore.h"
  14 #include "dir2.h"
  15 #include "protos.h"
  16 #include "err_protos.h"
  17 #include "dinode.h"
  18 #include "progress.h"
  19 #include "versions.h"
  20
  21 static struct cred              zerocr;
  22 static struct fsxattr           zerofsx;
  23 static xfs_ino_t                orphanage_ino;
  24
  25 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  26                                                 1,
  27                                                 XFS_DIR3_FT_DIR};
  28
  29 /*
  30  * When we're checking directory inodes, we're allowed to set a directory's
  31  * dotdot entry to zero to signal that the parent needs to be reconnected
  32  * during phase 6.  If we're handling a shortform directory the ifork
  33  * verifiers will fail, so temporarily patch out this canary so that we can
  34  * verify the rest of the fork and move on to fixing the dir.
  35  */
  36 static xfs_failaddr_t
  37 phase6_verify_dir(
  38         struct xfs_inode                *ip)
  39 {
  40         struct xfs_mount                *mp = ip->i_mount;
  41         const struct xfs_dir_ops        *dops;
  42         struct xfs_ifork                *ifp;
  43         struct xfs_dir2_sf_hdr          *sfp;
  44         xfs_failaddr_t                  fa;
  45         xfs_ino_t                       old_parent;
  46         bool                            parent_bypass = false;
  47         int                             size;
  48
  49         dops = libxfs_dir_get_ops(mp, NULL);
  50
  51         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
  52         sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
  53         size = ifp->if_bytes;
  54
  55         /*
  56          * If this is a shortform directory, phase4 may have set the parent
  57          * inode to zero to indicate that it must be fixed.  Temporarily
  58          * set a valid parent so that the directory verifier will pass.
  59          */
  60         if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
  61             size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
  62                 old_parent = dops->sf_get_parent_ino(sfp);
  63                 if (old_parent == 0) {
  64                         dops->sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
  65                         parent_bypass = true;
  66                 }
  67         }
  68
  69         fa = libxfs_default_ifork_ops.verify_dir(ip);
  70
  71         /* Put it back. */
  72         if (parent_bypass)
  73                 dops->sf_put_parent_ino(sfp, old_parent);
  74
  75         return fa;
  76 }
  77
  78 static struct xfs_ifork_ops phase6_ifork_ops = {
  79         .verify_attr    = xfs_attr_shortform_verify,
  80         .verify_dir     = phase6_verify_dir,
  81         .verify_symlink = xfs_symlink_shortform_verify,
  82 };
  83
  84 /*
  85  * Data structures used to keep track of directories where the ".."
  86  * entries are updated. These must be rebuilt after the initial pass
  87  */
  88 typedef struct dotdot_update {
  89         struct list_head        list;
  90         ino_tree_node_t         *irec;
  91         xfs_agnumber_t          agno;
  92         int                     ino_offset;
  93 } dotdot_update_t;
  94
  95 static LIST_HEAD(dotdot_update_list);
  96 static int                      dotdot_update;
  97
  98 static void
  99 add_dotdot_update(
 100         xfs_agnumber_t          agno,
 101         ino_tree_node_t         *irec,
 102         int                     ino_offset)
 103 {
 104         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
 105
 106         if (!dir)
 107                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
 108                         sizeof(dotdot_update_t));
 109
 110         INIT_LIST_HEAD(&dir->list);
 111         dir->irec = irec;
 112         dir->agno = agno;
 113         dir->ino_offset = ino_offset;
 114
 115         list_add(&dir->list, &dotdot_update_list);
 116 }
 117
 118 /*
 119  * Data structures and routines to keep track of directory entries
 120  * and whether their leaf entry has been seen. Also used for name
 121  * duplicate checking and rebuilding step if required.
 122  */
 123 typedef struct dir_hash_ent {
 124         struct dir_hash_ent     *nextbyaddr;    /* next in addr bucket */
 125         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
 126         struct dir_hash_ent     *nextbyorder;   /* next in order added */
 127         xfs_dahash_t            hashval;        /* hash value of name */
 128         uint32_t                address;        /* offset of data entry */
 129         xfs_ino_t               inum;           /* inode num of entry */
 130         short                   junkit;         /* name starts with / */
 131         short                   seen;           /* have seen leaf entry */
 132         struct xfs_name         name;
 133 } dir_hash_ent_t;
 134
 135 typedef struct dir_hash_tab {
 136         int                     size;           /* size of hash tables */
 137         int                     names_duped;    /* 1 = ent names malloced */
 138         dir_hash_ent_t          *first;         /* ptr to first added entry */
 139         dir_hash_ent_t          *last;          /* ptr to last added entry */
 140         dir_hash_ent_t          **byhash;       /* ptr to name hash buckets */
 141         dir_hash_ent_t          **byaddr;       /* ptr to addr hash buckets */
 142 } dir_hash_tab_t;
 143
 144 #define DIR_HASH_TAB_SIZE(n)    \
 145         (sizeof(dir_hash_tab_t) + (sizeof(dir_hash_ent_t *) * (n) * 2))
 146 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
 147
 148 /*
 149  * Track the contents of the freespace table in a directory.
 150  */
 151 typedef struct freetab {
 152         int                     naents; /* expected number of data blocks */
 153         int                     nents;  /* number of data blocks processed */
 154         struct freetab_ent {
 155                 xfs_dir2_data_off_t     v;
 156                 short                   s;
 157         } ents[1];
 158 } freetab_t;
 159 #define FREETAB_SIZE(n) \
 160         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 161
 162 #define DIR_HASH_CK_OK          0
 163 #define DIR_HASH_CK_DUPLEAF     1
 164 #define DIR_HASH_CK_BADHASH     2
 165 #define DIR_HASH_CK_NODATA      3
 166 #define DIR_HASH_CK_NOLEAF      4
 167 #define DIR_HASH_CK_BADSTALE    5
 168 #define DIR_HASH_CK_TOTAL       6
 169
 170 /*
 171  * Need to handle CRC and validation errors specially here. If there is a
 172  * validator error, re-read without the verifier so that we get a buffer we can
 173  * check and repair. Re-attach the ops to the buffer after the read so that when
 174  * it is rewritten the CRC is recalculated.
 175  *
 176  * If the buffer was not read, we return an error. If the buffer was read but
 177  * had a CRC or corruption error, we reread it without the verifier and if it is
 178  * read successfully we increment *crc_error and return 0. Otherwise we
 179  * return the read error.
 180  */
 181 static int
 182 dir_read_buf(
 183         struct xfs_inode        *ip,
 184         xfs_dablk_t             bno,
 185         xfs_daddr_t             mappedbno,
 186         struct xfs_buf          **bpp,
 187         const struct xfs_buf_ops *ops,
 188         int                     *crc_error)
 189 {
 190         int error;
 191         int error2;
 192
 193         error = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 194                                    XFS_DATA_FORK, ops);
 195
 196         if (error != EFSBADCRC && error != EFSCORRUPTED)
 197                 return error;
 198
 199         error2 = -libxfs_da_read_buf(NULL, ip, bno, mappedbno, bpp,
 200                                    XFS_DATA_FORK, NULL);
 201         if (error2)
 202                 return error2;
 203
 204         (*crc_error)++;
 205         (*bpp)->b_ops = ops;
 206         return 0;
 207 }
 208
 209 /*
 210  * Returns 0 if the name already exists (ie. a duplicate)
 211  */
 212 static int
 213 dir_hash_add(
 214         xfs_mount_t             *mp,
 215         dir_hash_tab_t          *hashtab,
 216         uint32_t                addr,
 217         xfs_ino_t               inum,
 218         int                     namelen,
 219         unsigned char           *name,
 220         uint8_t                 ftype)
 221 {
 222         xfs_dahash_t            hash = 0;
 223         int                     byaddr;
 224         int                     byhash = 0;
 225         dir_hash_ent_t          *p;
 226         int                     dup;
 227         short                   junk;
 228         struct xfs_name         xname;
 229
 230         ASSERT(!hashtab->names_duped);
 231
 232         xname.name = name;
 233         xname.len = namelen;
 234         xname.type = ftype;
 235
 236         junk = name[0] == '/';
 237         byaddr = DIR_HASH_FUNC(hashtab, addr);
 238         dup = 0;
 239
 240         if (!junk) {
 241                 hash = mp->m_dirnameops->hashname(&xname);
 242                 byhash = DIR_HASH_FUNC(hashtab, hash);
 243
 244                 /*
 245                  * search hash bucket for existing name.
 246                  */
 247                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 248                         if (p->hashval == hash && p->name.len == namelen) {
 249                                 if (memcmp(p->name.name, name, namelen) == 0) {
 250                                         dup = 1;
 251                                         junk = 1;
 252                                         break;
 253                                 }
 254                         }
 255                 }
 256         }
 257
 258         if ((p = malloc(sizeof(*p))) == NULL)
 259                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 260                         sizeof(*p));
 261
 262         p->nextbyaddr = hashtab->byaddr[byaddr];
 263         hashtab->byaddr[byaddr] = p;
 264         if (hashtab->last)
 265                 hashtab->last->nextbyorder = p;
 266         else
 267                 hashtab->first = p;
 268         p->nextbyorder = NULL;
 269         hashtab->last = p;
 270
 271         if (!(p->junkit = junk)) {
 272                 p->hashval = hash;
 273                 p->nextbyhash = hashtab->byhash[byhash];
 274                 hashtab->byhash[byhash] = p;
 275         }
 276         p->address = addr;
 277         p->inum = inum;
 278         p->seen = 0;
 279         p->name = xname;
 280
 281         return !dup;
 282 }
 283
 284 /*
 285  * checks to see if any data entries are not in the leaf blocks
 286  */
 287 static int
 288 dir_hash_unseen(
 289         dir_hash_tab_t  *hashtab)
 290 {
 291         int             i;
 292         dir_hash_ent_t  *p;
 293
 294         for (i = 0; i < hashtab->size; i++) {
 295                 for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 296                         if (p->seen == 0)
 297                                 return 1;
 298                 }
 299         }
 300         return 0;
 301 }
 302
 303 static int
 304 dir_hash_check(
 305         dir_hash_tab_t  *hashtab,
 306         xfs_inode_t     *ip,
 307         int             seeval)
 308 {
 309         static char     *seevalstr[DIR_HASH_CK_TOTAL];
 310         static int      done;
 311
 312         if (!done) {
 313                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 314                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 315                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 316                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 317                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 318                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 319                 done = 1;
 320         }
 321
 322         if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
 323                 seeval = DIR_HASH_CK_NOLEAF;
 324         if (seeval == DIR_HASH_CK_OK)
 325                 return 0;
 326         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 327                 ip->i_ino, seevalstr[seeval]);
 328         if (!no_modify)
 329                 do_warn(_("rebuilding\n"));
 330         else
 331                 do_warn(_("would rebuild\n"));
 332         return 1;
 333 }
 334
 335 static void
 336 dir_hash_done(
 337         dir_hash_tab_t  *hashtab)
 338 {
 339         int             i;
 340         dir_hash_ent_t  *n;
 341         dir_hash_ent_t  *p;
 342
 343         for (i = 0; i < hashtab->size; i++) {
 344                 for (p = hashtab->byaddr[i]; p; p = n) {
 345                         n = p->nextbyaddr;
 346                         if (hashtab->names_duped)
 347                                 free((void *)p->name.name);
 348                         free(p);
 349                 }
 350         }
 351         free(hashtab);
 352 }
 353
 354 static dir_hash_tab_t *
 355 dir_hash_init(
 356         xfs_fsize_t     size)
 357 {
 358         dir_hash_tab_t  *hashtab;
 359         int             hsize;
 360
 361         hsize = size / (16 * 4);
 362         if (hsize > 65536)
 363                 hsize = 63336;
 364         else if (hsize < 16)
 365                 hsize = 16;
 366         if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
 367                 do_error(_("calloc failed in dir_hash_init\n"));
 368         hashtab->size = hsize;
 369         hashtab->byhash = (dir_hash_ent_t**)((char *)hashtab +
 370                 sizeof(dir_hash_tab_t));
 371         hashtab->byaddr = (dir_hash_ent_t**)((char *)hashtab +
 372                 sizeof(dir_hash_tab_t) + sizeof(dir_hash_ent_t*) * hsize);
 373         return hashtab;
 374 }
 375
 376 static int
 377 dir_hash_see(
 378         dir_hash_tab_t          *hashtab,
 379         xfs_dahash_t            hash,
 380         xfs_dir2_dataptr_t      addr)
 381 {
 382         int                     i;
 383         dir_hash_ent_t          *p;
 384
 385         i = DIR_HASH_FUNC(hashtab, addr);
 386         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 387                 if (p->address != addr)
 388                         continue;
 389                 if (p->seen)
 390                         return DIR_HASH_CK_DUPLEAF;
 391                 if (p->junkit == 0 && p->hashval != hash)
 392                         return DIR_HASH_CK_BADHASH;
 393                 p->seen = 1;
 394                 return DIR_HASH_CK_OK;
 395         }
 396         return DIR_HASH_CK_NODATA;
 397 }
 398
 399 static void
 400 dir_hash_update_ftype(
 401         dir_hash_tab_t          *hashtab,
 402         xfs_dir2_dataptr_t      addr,
 403         uint8_t                 ftype)
 404 {
 405         int                     i;
 406         dir_hash_ent_t          *p;
 407
 408         i = DIR_HASH_FUNC(hashtab, addr);
 409         for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
 410                 if (p->address != addr)
 411                         continue;
 412                 p->name.type = ftype;
 413         }
 414 }
 415
 416 /*
 417  * checks to make sure leafs match a data entry, and that the stale
 418  * count is valid.
 419  */
 420 static int
 421 dir_hash_see_all(
 422         dir_hash_tab_t          *hashtab,
 423         xfs_dir2_leaf_entry_t   *ents,
 424         int                     count,
 425         int                     stale)
 426 {
 427         int                     i;
 428         int                     j;
 429         int                     rval;
 430
 431         for (i = j = 0; i < count; i++) {
 432                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 433                         j++;
 434                         continue;
 435                 }
 436                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 437                                         be32_to_cpu(ents[i].address));
 438                 if (rval != DIR_HASH_CK_OK)
 439                         return rval;
 440         }
 441         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 442 }
 443
 444 /*
 445  * Convert name pointers into locally allocated memory.
 446  * This must only be done after all the entries have been added.
 447  */
 448 static void
 449 dir_hash_dup_names(dir_hash_tab_t *hashtab)
 450 {
 451         unsigned char           *name;
 452         dir_hash_ent_t          *p;
 453
 454         if (hashtab->names_duped)
 455                 return;
 456
 457         for (p = hashtab->first; p; p = p->nextbyorder) {
 458                 name = malloc(p->name.len);
 459                 memcpy(name, p->name.name, p->name.len);
 460                 p->name.name = name;
 461         }
 462         hashtab->names_duped = 1;
 463 }
 464
 465 /*
 466  * Given a block number in a fork, return the next valid block number
 467  * (not a hole).
 468  * If this is the last block number then NULLFILEOFF is returned.
 469  *
 470  * This was originally in the kernel, but only used in xfs_repair.
 471  */
 472 static int
 473 bmap_next_offset(
 474         xfs_trans_t     *tp,                    /* transaction pointer */
 475         xfs_inode_t     *ip,                    /* incore inode */
 476         xfs_fileoff_t   *bnop,                  /* current block */
 477         int             whichfork)              /* data or attr fork */
 478 {
 479         xfs_fileoff_t   bno;                    /* current block */
 480         int             error;                  /* error return value */
 481         xfs_bmbt_irec_t got;                    /* current extent value */
 482         xfs_ifork_t     *ifp;                   /* inode fork pointer */
 483         struct xfs_iext_cursor  icur;
 484
 485         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
 486             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 487             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
 488                return EIO;
 489         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 490                 *bnop = NULLFILEOFF;
 491                 return 0;
 492         }
 493         ifp = XFS_IFORK_PTR(ip, whichfork);
 494         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 495             (error = -libxfs_iread_extents(tp, ip, whichfork)))
 496                 return error;
 497         bno = *bnop + 1;
 498         if (!libxfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
 499                 *bnop = NULLFILEOFF;
 500         else
 501                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 502         return 0;
 503 }
 504
 505
 506 static void
 507 res_failed(
 508         int     err)
 509 {
 510         if (err == ENOSPC) {
 511                 do_error(_("ran out of disk space!\n"));
 512         } else
 513                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 514 }
 515
 516 void
 517 mk_rbmino(xfs_mount_t *mp)
 518 {
 519         xfs_trans_t     *tp;
 520         xfs_inode_t     *ip;
 521         xfs_bmbt_irec_t *ep;
 522         xfs_fsblock_t   first;
 523         int             i;
 524         int             nmap;
 525         int             error;
 526         struct xfs_defer_ops    dfops;
 527         xfs_fileoff_t   bno;
 528         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 529         int             vers;
 530         int             times;
 531         uint            blocks;
 532
 533         /*
 534          * first set up inode
 535          */
 536         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 537         if (i)
 538                 res_failed(i);
 539
 540         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 541         if (error) {
 542                 do_error(
 543                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 544                         error);
 545         }
 546
 547         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 548         memset(&ip->i_d, 0, sizeof(ip->i_d));
 549
 550         VFS_I(ip)->i_mode = S_IFREG;
 551         ip->i_d.di_version = vers;
 552         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 553         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 554
 555         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 556
 557         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 558         if (ip->i_d.di_version == 3) {
 559                 VFS_I(ip)->i_version = 1;
 560                 ip->i_d.di_flags2 = 0;
 561                 times |= XFS_ICHGTIME_CREATE;
 562         }
 563         libxfs_trans_ichgtime(tp, ip, times);
 564
 565         /*
 566          * now the ifork
 567          */
 568         ip->i_df.if_flags = XFS_IFEXTENTS;
 569         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 570         ip->i_df.if_u1.if_root = NULL;
 571
 572         ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 573
 574         /*
 575          * commit changes
 576          */
 577         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 578         libxfs_trans_commit(tp);
 579
 580         /*
 581          * then allocate blocks for file and fill with zeroes (stolen
 582          * from mkfs)
 583          */
 584         blocks = mp->m_sb.sb_rbmblocks +
 585                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 586         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 587         if (error)
 588                 res_failed(error);
 589
 590         libxfs_trans_ijoin(tp, ip, 0);
 591         bno = 0;
 592         libxfs_defer_init(&dfops, &first);
 593         while (bno < mp->m_sb.sb_rbmblocks) {
 594                 nmap = XFS_BMAP_MAX_NMAP;
 595                 error = -libxfs_bmapi_write(tp, ip, bno,
 596                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 597                           0, &first, mp->m_sb.sb_rbmblocks,
 598                           map, &nmap, &dfops);
 599                 if (error) {
 600                         do_error(
 601                         _("couldn't allocate realtime bitmap, error = %d\n"),
 602                                 error);
 603                 }
 604                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 605                         libxfs_device_zero(mp->m_ddev_targp,
 606                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 607                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 608                         bno += ep->br_blockcount;
 609                 }
 610         }
 611         libxfs_defer_ijoin(&dfops, ip);
 612         error = -libxfs_defer_finish(&tp, &dfops);
 613         if (error) {
 614                 do_error(
 615                 _("allocation of the realtime bitmap failed, error = %d\n"),
 616                         error);
 617         }
 618         libxfs_trans_commit(tp);
 619         IRELE(ip);
 620 }
 621
 622 static int
 623 fill_rbmino(xfs_mount_t *mp)
 624 {
 625         xfs_buf_t       *bp;
 626         xfs_trans_t     *tp;
 627         xfs_inode_t     *ip;
 628         xfs_rtword_t    *bmp;
 629         xfs_fsblock_t   first;
 630         int             nmap;
 631         int             error;
 632         xfs_fileoff_t   bno;
 633         xfs_bmbt_irec_t map;
 634
 635         bmp = btmcompute;
 636         bno = 0;
 637
 638         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 639         if (error)
 640                 res_failed(error);
 641
 642         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 0, &ip);
 643         if (error) {
 644                 do_error(
 645                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 646                         error);
 647         }
 648
 649         first = NULLFSBLOCK;
 650         while (bno < mp->m_sb.sb_rbmblocks)  {
 651                 /*
 652                  * fill the file one block at a time
 653                  */
 654                 nmap = 1;
 655                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 656                                         &first, 1, &map, &nmap, NULL);
 657                 if (error || nmap != 1) {
 658                         do_error(
 659         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 660                                 bno, error);
 661                 }
 662
 663                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 664
 665                 error = -libxfs_trans_read_buf(
 666                                 mp, tp, mp->m_dev,
 667                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 668                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 669
 670                 if (error) {
 671                         do_warn(
 672 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 673                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 674                         return(1);
 675                 }
 676
 677                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 678
 679                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 680
 681                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 682                 bno++;
 683         }
 684
 685         libxfs_trans_commit(tp);
 686         IRELE(ip);
 687         return(0);
 688 }
 689
 690 static int
 691 fill_rsumino(xfs_mount_t *mp)
 692 {
 693         xfs_buf_t       *bp;
 694         xfs_trans_t     *tp;
 695         xfs_inode_t     *ip;
 696         xfs_suminfo_t   *smp;
 697         xfs_fsblock_t   first;
 698         int             nmap;
 699         int             error;
 700         xfs_fileoff_t   bno;
 701         xfs_fileoff_t   end_bno;
 702         xfs_bmbt_irec_t map;
 703
 704         smp = sumcompute;
 705         bno = 0;
 706         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 707
 708         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 709         if (error)
 710                 res_failed(error);
 711
 712         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 713         if (error) {
 714                 do_error(
 715                 _("couldn't iget realtime summary inode -- error - %d\n"),
 716                         error);
 717         }
 718
 719         first = NULLFSBLOCK;
 720         while (bno < end_bno)  {
 721                 /*
 722                  * fill the file one block at a time
 723                  */
 724                 nmap = 1;
 725                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0,
 726                                         &first, 1, &map, &nmap, NULL);
 727                 if (error || nmap != 1) {
 728                         do_error(
 729         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 730                                 bno, error);
 731                 }
 732
 733                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 734
 735                 error = -libxfs_trans_read_buf(
 736                                 mp, tp, mp->m_dev,
 737                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 738                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 739
 740                 if (error) {
 741                         do_warn(
 742 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 743                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 744                         IRELE(ip);
 745                         return(1);
 746                 }
 747
 748                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 749
 750                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 751
 752                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 753                 bno++;
 754         }
 755
 756         libxfs_trans_commit(tp);
 757         IRELE(ip);
 758         return(0);
 759 }
 760
 761 static void
 762 mk_rsumino(xfs_mount_t *mp)
 763 {
 764         xfs_trans_t     *tp;
 765         xfs_inode_t     *ip;
 766         xfs_bmbt_irec_t *ep;
 767         xfs_fsblock_t   first;
 768         int             i;
 769         int             nmap;
 770         int             error;
 771         int             nsumblocks;
 772         struct xfs_defer_ops    dfops;
 773         xfs_fileoff_t   bno;
 774         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 775         int             vers;
 776         int             times;
 777         uint            blocks;
 778
 779         /*
 780          * first set up inode
 781          */
 782         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 783         if (i)
 784                 res_failed(i);
 785
 786         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 0, &ip);
 787         if (error) {
 788                 do_error(
 789                 _("couldn't iget realtime summary inode -- error - %d\n"),
 790                         error);
 791         }
 792
 793         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 794         memset(&ip->i_d, 0, sizeof(ip->i_d));
 795
 796         VFS_I(ip)->i_mode = S_IFREG;
 797         ip->i_d.di_version = vers;
 798         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 799         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 800
 801         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 802
 803         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 804         if (ip->i_d.di_version == 3) {
 805                 VFS_I(ip)->i_version = 1;
 806                 ip->i_d.di_flags2 = 0;
 807                 times |= XFS_ICHGTIME_CREATE;
 808         }
 809         libxfs_trans_ichgtime(tp, ip, times);
 810
 811         /*
 812          * now the ifork
 813          */
 814         ip->i_df.if_flags = XFS_IFEXTENTS;
 815         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 816         ip->i_df.if_u1.if_root = NULL;
 817
 818         ip->i_d.di_size = mp->m_rsumsize;
 819
 820         /*
 821          * commit changes
 822          */
 823         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 824         libxfs_trans_commit(tp);
 825
 826         /*
 827          * then allocate blocks for file and fill with zeroes (stolen
 828          * from mkfs)
 829          */
 830         libxfs_defer_init(&dfops, &first);
 831
 832         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 833         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 834         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 835         if (error)
 836                 res_failed(error);
 837
 838         libxfs_trans_ijoin(tp, ip, 0);
 839         bno = 0;
 840         libxfs_defer_init(&dfops, &first);
 841         while (bno < nsumblocks) {
 842                 nmap = XFS_BMAP_MAX_NMAP;
 843                 error = -libxfs_bmapi_write(tp, ip, bno,
 844                           (xfs_extlen_t)(nsumblocks - bno),
 845                           0, &first, nsumblocks, map, &nmap, &dfops);
 846                 if (error) {
 847                         do_error(
 848                 _("couldn't allocate realtime summary inode, error = %d\n"),
 849                                 error);
 850                 }
 851                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 852                         libxfs_device_zero(mp->m_ddev_targp,
 853                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 854                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 855                         bno += ep->br_blockcount;
 856                 }
 857         }
 858         libxfs_defer_ijoin(&dfops, ip);
 859         error = -libxfs_defer_finish(&tp, &dfops);
 860         if (error) {
 861                 do_error(
 862         _("allocation of the realtime summary ino failed, error = %d\n"),
 863                         error);
 864         }
 865         libxfs_trans_commit(tp);
 866         IRELE(ip);
 867 }
 868
 869 /*
 870  * makes a new root directory.
 871  */
 872 static void
 873 mk_root_dir(xfs_mount_t *mp)
 874 {
 875         xfs_trans_t     *tp;
 876         xfs_inode_t     *ip;
 877         int             i;
 878         int             error;
 879         const mode_t    mode = 0755;
 880         ino_tree_node_t *irec;
 881         int             vers;
 882         int             times;
 883
 884         ip = NULL;
 885         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 886         if (i)
 887                 res_failed(i);
 888
 889         error = -libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, 0, &ip);
 890         if (error) {
 891                 do_error(_("could not iget root inode -- error - %d\n"), error);
 892         }
 893
 894         /*
 895          * take care of the core -- initialization from xfs_ialloc()
 896          */
 897         vers = xfs_sb_version_hascrc(&mp->m_sb) ? 3 : 2;
 898         memset(&ip->i_d, 0, sizeof(ip->i_d));
 899
 900         VFS_I(ip)->i_mode = mode|S_IFDIR;
 901         ip->i_d.di_version = vers;
 902         ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 903         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 904
 905         set_nlink(VFS_I(ip), 1);        /* account for . */
 906
 907         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 908         if (ip->i_d.di_version == 3) {
 909                 VFS_I(ip)->i_version = 1;
 910                 ip->i_d.di_flags2 = 0;
 911                 times |= XFS_ICHGTIME_CREATE;
 912         }
 913         libxfs_trans_ichgtime(tp, ip, times);
 914
 915         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 916
 917         /*
 918          * now the ifork
 919          */
 920         ip->i_df.if_flags = XFS_IFEXTENTS;
 921         ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 922         ip->i_df.if_u1.if_root = NULL;
 923
 924
 925
 926         /*
 927          * initialize the directory
 928          */
 929         ip->d_ops = mp->m_dir_inode_ops;
 930         libxfs_dir_init(tp, ip, ip);
 931
 932         libxfs_trans_commit(tp);
 933         IRELE(ip);
 934
 935         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 936                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 937         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 938                                 irec->ino_startnum);
 939 }
 940
 941 /*
 942  * orphanage name == lost+found
 943  */
 944 static xfs_ino_t
 945 mk_orphanage(xfs_mount_t *mp)
 946 {
 947         xfs_ino_t       ino;
 948         xfs_trans_t     *tp;
 949         xfs_inode_t     *ip;
 950         xfs_inode_t     *pip;
 951         xfs_fsblock_t   first;
 952         ino_tree_node_t *irec;
 953         int             ino_offset = 0;
 954         int             i;
 955         int             error;
 956         struct xfs_defer_ops    dfops;
 957         const int       mode = 0755;
 958         int             nres;
 959         struct xfs_name xname;
 960
 961         /*
 962          * check for an existing lost+found first, if it exists, return
 963          * its inode. Otherwise, we can create it. Bad lost+found inodes
 964          * would have been cleared in phase3 and phase4.
 965          */
 966
 967         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 968                         &xfs_default_ifork_ops);
 969         if (i)
 970                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 971                         i, ORPHANAGE);
 972
 973         xname.name = (unsigned char *)ORPHANAGE;
 974         xname.len = strlen(ORPHANAGE);
 975         xname.type = XFS_DIR3_FT_DIR;
 976
 977         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 978                 return ino;
 979
 980         /*
 981          * could not be found, create it
 982          */
 983         libxfs_defer_init(&dfops, &first);
 984         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 985         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 986         if (i)
 987                 res_failed(i);
 988
 989         /*
 990          * use iget/ijoin instead of trans_iget because the ialloc
 991          * wrapper can commit the transaction and start a new one
 992          */
 993 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip,
 994                         &xfs_default_ifork_ops);
 995         if (i)
 996                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 997                         i, ORPHANAGE);*/
 998
 999         error = -libxfs_inode_alloc(&tp, pip, mode|S_IFDIR,
1000                                         1, 0, &zerocr, &zerofsx, &ip);
1001         if (error) {
1002                 do_error(_("%s inode allocation failed %d\n"),
1003                         ORPHANAGE, error);
1004         }
1005         inc_nlink(VFS_I(ip));           /* account for . */
1006         ino = ip->i_ino;
1007
1008         irec = find_inode_rec(mp,
1009                         XFS_INO_TO_AGNO(mp, ino),
1010                         XFS_INO_TO_AGINO(mp, ino));
1011
1012         if (irec == NULL) {
1013                 /*
1014                  * This inode is allocated from a newly created inode
1015                  * chunk and therefore did not exist when inode chunks
1016                  * were processed in phase3. Add this group of inodes to
1017                  * the entry avl tree as if they were discovered in phase3.
1018                  */
1019                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
1020                                             XFS_INO_TO_AGINO(mp, ino));
1021                 alloc_ex_data(irec);
1022
1023                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
1024                         set_inode_free(irec, i);
1025         }
1026
1027         ino_offset = get_inode_offset(mp, ino, irec);
1028
1029         /*
1030          * Mark the inode allocated to lost+found as used in the AVL tree
1031          * so it is not skipped in phase 7
1032          */
1033         set_inode_used(irec, ino_offset);
1034         add_inode_ref(irec, ino_offset);
1035
1036         /*
1037          * now that we know the transaction will stay around,
1038          * add the root inode to it
1039          */
1040         libxfs_trans_ijoin(tp, pip, 0);
1041
1042         /*
1043          * create the actual entry
1044          */
1045         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, &first,
1046                                         &dfops, nres);
1047         if (error)
1048                 do_error(
1049                 _("can't make %s, createname error %d\n"),
1050                         ORPHANAGE, error);
1051
1052         /*
1053          * bump up the link count in the root directory to account
1054          * for .. in the new directory
1055          */
1056         inc_nlink(VFS_I(pip));
1057         add_inode_ref(find_inode_rec(mp,
1058                                 XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
1059                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)), 0);
1060
1061
1062
1063         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
1064         libxfs_dir_init(tp, ip, pip);
1065         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1066
1067         libxfs_defer_ijoin(&dfops, ip);
1068         error = -libxfs_defer_finish(&tp, &dfops);
1069         if (error) {
1070                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1071                         ORPHANAGE, error);
1072         }
1073
1074
1075         libxfs_trans_commit(tp);
1076         IRELE(ip);
1077         IRELE(pip);
1078         add_inode_reached(irec,ino_offset);
1079
1080         return(ino);
1081 }
1082
1083 /*
1084  * move a file to the orphange.
1085  */
1086 static void
1087 mv_orphanage(
1088         xfs_mount_t             *mp,
1089         xfs_ino_t               ino,            /* inode # to be moved */
1090         int                     isa_dir)        /* 1 if inode is a directory */
1091 {
1092         xfs_inode_t             *orphanage_ip;
1093         xfs_ino_t               entry_ino_num;
1094         xfs_inode_t             *ino_p;
1095         xfs_trans_t             *tp;
1096         xfs_fsblock_t           first;
1097         struct xfs_defer_ops            dfops;
1098         int                     err;
1099         unsigned char           fname[MAXPATHLEN + 1];
1100         int                     nres;
1101         int                     incr;
1102         ino_tree_node_t         *irec;
1103         int                     ino_offset = 0;
1104         struct xfs_name         xname;
1105
1106         xname.name = fname;
1107         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1108                                 (unsigned long long)ino);
1109
1110         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip,
1111                         &xfs_default_ifork_ops);
1112         if (err)
1113                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1114         /*
1115          * Make sure the filename is unique in the lost+found
1116          */
1117         incr = 0;
1118         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1119                                                                 NULL) == 0)
1120                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1121                                         (unsigned long long)ino, ++incr);
1122
1123         /* Orphans may not have a proper parent, so use custom ops here */
1124         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &phase6_ifork_ops);
1125         if (err)
1126                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1127
1128         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1129
1130         if (isa_dir)  {
1131                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1132                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1133                 if (irec)
1134                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1135                                         irec->ino_startnum;
1136                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1137                        XFS_DIRENTER_SPACE_RES(mp, 2);
1138                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1139                                         &entry_ino_num, NULL);
1140                 if (err) {
1141                         ASSERT(err == ENOENT);
1142
1143                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1144                                                   nres, 0, 0, &tp);
1145                         if (err)
1146                                 do_error(
1147         _("space reservation failed (%d), filesystem may be out of space\n"),
1148                                         err);
1149
1150                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1151                         libxfs_trans_ijoin(tp, ino_p, 0);
1152
1153                         libxfs_defer_init(&dfops, &first);
1154                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1155                                                 ino, &first, &dfops, nres);
1156                         if (err)
1157                                 do_error(
1158         _("name create failed in %s (%d), filesystem may be out of space\n"),
1159                                         ORPHANAGE, err);
1160
1161                         if (irec)
1162                                 add_inode_ref(irec, ino_offset);
1163                         else
1164                                 inc_nlink(VFS_I(orphanage_ip));
1165                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1166
1167                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1168                                         orphanage_ino, &first, &dfops, nres);
1169                         if (err)
1170                                 do_error(
1171         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1172                                         err);
1173
1174                         inc_nlink(VFS_I(ino_p));
1175                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1176
1177                         libxfs_defer_ijoin(&dfops, ino_p);
1178                         err = -libxfs_defer_finish(&tp, &dfops);
1179                         if (err)
1180                                 do_error(
1181         _("bmap finish failed (err - %d), filesystem may be out of space\n"),
1182                                         err);
1183
1184                         libxfs_trans_commit(tp);
1185                 } else  {
1186                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1187                                                   nres, 0, 0, &tp);
1188                         if (err)
1189                                 do_error(
1190         _("space reservation failed (%d), filesystem may be out of space\n"),
1191                                         err);
1192
1193                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1194                         libxfs_trans_ijoin(tp, ino_p, 0);
1195
1196                         libxfs_defer_init(&dfops, &first);
1197
1198                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1199                                                 ino, &first, &dfops, nres);
1200                         if (err)
1201                                 do_error(
1202         _("name create failed in %s (%d), filesystem may be out of space\n"),
1203                                         ORPHANAGE, err);
1204
1205                         if (irec)
1206                                 add_inode_ref(irec, ino_offset);
1207                         else
1208                                 inc_nlink(VFS_I(orphanage_ip));
1209                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1210
1211                         /*
1212                          * don't replace .. value if it already points
1213                          * to us.  that'll pop a libxfs/kernel ASSERT.
1214                          */
1215                         if (entry_ino_num != orphanage_ino)  {
1216                                 err = -libxfs_dir_replace(tp, ino_p,
1217                                                 &xfs_name_dotdot, orphanage_ino,
1218                                                 &first, &dfops, nres);
1219                                 if (err)
1220                                         do_error(
1221         _("name replace op failed (%d), filesystem may be out of space\n"),
1222                                                 err);
1223                         }
1224
1225                         libxfs_defer_ijoin(&dfops, ino_p);
1226                         err = -libxfs_defer_finish(&tp, &dfops);
1227                         if (err)
1228                                 do_error(
1229         _("bmap finish failed (%d), filesystem may be out of space\n"),
1230                                         err);
1231
1232                         libxfs_trans_commit(tp);
1233                 }
1234
1235         } else  {
1236                 /*
1237                  * use the remove log reservation as that's
1238                  * more accurate.  we're only creating the
1239                  * links, we're not doing the inode allocation
1240                  * also accounted for in the create
1241                  */
1242                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1243                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1244                                           nres, 0, 0, &tp);
1245                 if (err)
1246                         do_error(
1247         _("space reservation failed (%d), filesystem may be out of space\n"),
1248                                 err);
1249
1250                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1251                 libxfs_trans_ijoin(tp, ino_p, 0);
1252
1253                 libxfs_defer_init(&dfops, &first);
1254                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1255                                                 &first, &dfops, nres);
1256                 if (err)
1257                         do_error(
1258         _("name create failed in %s (%d), filesystem may be out of space\n"),
1259                                 ORPHANAGE, err);
1260                 ASSERT(err == 0);
1261
1262                 set_nlink(VFS_I(ino_p), 1);
1263                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1264
1265                 libxfs_defer_ijoin(&dfops, ino_p);
1266                 err = -libxfs_defer_finish(&tp, &dfops);
1267                 if (err)
1268                         do_error(
1269         _("bmap finish failed (%d), filesystem may be out of space\n"),
1270                                 err);
1271
1272                 libxfs_trans_commit(tp);
1273         }
1274         IRELE(ino_p);
1275         IRELE(orphanage_ip);
1276 }
1277
1278 static int
1279 entry_junked(
1280         const char      *msg,
1281         const char      *iname,
1282         xfs_ino_t       ino1,
1283         xfs_ino_t       ino2)
1284 {
1285         do_warn(msg, iname, ino1, ino2);
1286         if (!no_modify) {
1287                 if (verbose)
1288                         do_warn(_(", marking entry to be junked\n"));
1289                 else
1290                         do_warn("\n");
1291         } else
1292                 do_warn(_(", would junk entry\n"));
1293         return !no_modify;
1294 }
1295
1296 /* Find and invalidate all the directory's buffers. */
1297 static int
1298 dir_binval(
1299         struct xfs_trans        *tp,
1300         struct xfs_inode        *ip,
1301         int                     whichfork)
1302 {
1303         struct xfs_iext_cursor  icur;
1304         struct xfs_bmbt_irec    rec;
1305         struct xfs_ifork        *ifp;
1306         struct xfs_da_geometry  *geo;
1307         struct xfs_buf          *bp;
1308         xfs_dablk_t             dabno, end_dabno;
1309         int                     error = 0;
1310
1311         if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
1312             ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
1313                 return 0;
1314
1315         geo = tp->t_mountp->m_dir_geo;
1316         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1317         for_each_xfs_iext(ifp, &icur, &rec) {
1318                 dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1319                                 geo->fsbcount - 1);
1320                 end_dabno = xfs_dir2_db_to_da(geo, rec.br_startoff +
1321                                 rec.br_blockcount);
1322                 for (; dabno <= end_dabno; dabno += geo->fsbcount) {
1323                         bp = NULL;
1324                         error = -libxfs_da_get_buf(tp, ip, dabno, -2, &bp,
1325                                         whichfork);
1326                         if (error)
1327                                 return error;
1328                         if (!bp)
1329                                 continue;
1330                         libxfs_trans_binval(tp, bp);
1331                         libxfs_trans_brelse(tp, bp);
1332                 }
1333         }
1334
1335         return error;
1336 }
1337
1338 /*
1339  * Unexpected failure during the rebuild will leave the entries in
1340  * lost+found on the next run
1341  */
1342
1343 static void
1344 longform_dir2_rebuild(
1345         xfs_mount_t             *mp,
1346         xfs_ino_t               ino,
1347         xfs_inode_t             *ip,
1348         ino_tree_node_t         *irec,
1349         int                     ino_offset,
1350         dir_hash_tab_t          *hashtab)
1351 {
1352         int                     error;
1353         int                     nres;
1354         xfs_trans_t             *tp;
1355         xfs_fileoff_t           lastblock;
1356         xfs_fsblock_t           firstblock;
1357         struct xfs_defer_ops            dfops;
1358         xfs_inode_t             pip;
1359         dir_hash_ent_t          *p;
1360         int                     done;
1361
1362         /*
1363          * trash directory completely and rebuild from scratch using the
1364          * name/inode pairs in the hash table
1365          */
1366
1367         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1368
1369         /*
1370          * first attempt to locate the parent inode, if it can't be
1371          * found, set it to the root inode and it'll be moved to the
1372          * orphanage later (the inode number here needs to be valid
1373          * for the libxfs_dir_init() call).
1374          */
1375         pip.i_ino = get_inode_parent(irec, ino_offset);
1376         if (pip.i_ino == NULLFSINO ||
1377             libxfs_dir_ino_validate(mp, pip.i_ino))
1378                 pip.i_ino = mp->m_sb.sb_rootino;
1379
1380         libxfs_defer_init(&dfops, &firstblock);
1381
1382         nres = XFS_REMOVE_SPACE_RES(mp);
1383         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1384         if (error)
1385                 res_failed(error);
1386         libxfs_trans_ijoin(tp, ip, 0);
1387
1388         error = dir_binval(tp, ip, XFS_DATA_FORK);
1389         if (error)
1390                 res_failed(error);
1391
1392         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1393                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1394                         error);
1395
1396         /* free all data, leaf, node and freespace blocks */
1397         error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA, 0,
1398                                 &firstblock, &dfops, &done);
1399         if (error) {
1400                 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1401                 goto out_bmap_cancel;
1402         }
1403
1404         ASSERT(done);
1405
1406         error = -libxfs_dir_init(tp, ip, &pip);
1407         if (error) {
1408                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1409                 goto out_bmap_cancel;
1410         }
1411
1412         libxfs_defer_ijoin(&dfops, ip);
1413         error = -libxfs_defer_finish(&tp, &dfops);
1414
1415         libxfs_trans_commit(tp);
1416
1417         if (ino == mp->m_sb.sb_rootino)
1418                 need_root_dotdot = 0;
1419
1420         /* go through the hash list and re-add the inodes */
1421
1422         for (p = hashtab->first; p; p = p->nextbyorder) {
1423
1424                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1425                                 (p->name.len == 1 || (p->name.len == 2 &&
1426                                                 p->name.name[1] == '.'))))
1427                         continue;
1428
1429                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1430                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1431                                             nres, 0, 0, &tp);
1432                 if (error)
1433                         res_failed(error);
1434
1435                 libxfs_trans_ijoin(tp, ip, 0);
1436
1437                 libxfs_defer_init(&dfops, &firstblock);
1438                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1439                                                 &firstblock, &dfops, nres);
1440                 if (error) {
1441                         do_warn(
1442 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1443                                 ino, error);
1444                         goto out_bmap_cancel;
1445                 }
1446
1447                 libxfs_defer_ijoin(&dfops, ip);
1448                 error = -libxfs_defer_finish(&tp, &dfops);
1449                 if (error) {
1450                         do_warn(
1451         _("bmap finish failed (%d), filesystem may be out of space\n"),
1452                                 error);
1453                         goto out_bmap_cancel;
1454                 }
1455
1456                 libxfs_trans_commit(tp);
1457         }
1458
1459         return;
1460
1461 out_bmap_cancel:
1462         libxfs_defer_cancel(&dfops);
1463         libxfs_trans_cancel(tp);
1464         return;
1465 }
1466
1467
1468 /*
1469  * Kill a block in a version 2 inode.
1470  * Makes its own transaction.
1471  */
1472 static void
1473 dir2_kill_block(
1474         xfs_mount_t     *mp,
1475         xfs_inode_t     *ip,
1476         xfs_dablk_t     da_bno,
1477         struct xfs_buf  *bp)
1478 {
1479         xfs_da_args_t   args;
1480         int             error;
1481         xfs_fsblock_t   firstblock;
1482         struct xfs_defer_ops    dfops;
1483         int             nres;
1484         xfs_trans_t     *tp;
1485
1486         nres = XFS_REMOVE_SPACE_RES(mp);
1487         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1488         if (error)
1489                 res_failed(error);
1490         libxfs_trans_ijoin(tp, ip, 0);
1491         libxfs_trans_bjoin(tp, bp);
1492         memset(&args, 0, sizeof(args));
1493         libxfs_defer_init(&dfops, &firstblock);
1494         args.dp = ip;
1495         args.trans = tp;
1496         args.firstblock = &firstblock;
1497         args.dfops = &dfops;
1498         args.whichfork = XFS_DATA_FORK;
1499         args.geo = mp->m_dir_geo;
1500         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1501                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1502         else
1503                 error = -libxfs_dir2_shrink_inode(&args,
1504                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1505         if (error)
1506                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1507                         ip->i_ino, da_bno);
1508         libxfs_defer_ijoin(&dfops, ip);
1509         libxfs_defer_finish(&tp, &dfops);
1510         libxfs_trans_commit(tp);
1511 }
1512
1513 /*
1514  * process a data block, also checks for .. entry
1515  * and corrects it to match what we think .. should be
1516  */
1517 static void
1518 longform_dir2_entry_check_data(
1519         xfs_mount_t             *mp,
1520         xfs_inode_t             *ip,
1521         int                     *num_illegal,
1522         int                     *need_dot,
1523         ino_tree_node_t         *current_irec,
1524         int                     current_ino_offset,
1525         struct xfs_buf          **bpp,
1526         dir_hash_tab_t          *hashtab,
1527         freetab_t               **freetabp,
1528         xfs_dablk_t             da_bno,
1529         int                     isblock)
1530 {
1531         xfs_dir2_dataptr_t      addr;
1532         xfs_dir2_leaf_entry_t   *blp;
1533         struct xfs_buf          *bp;
1534         xfs_dir2_block_tail_t   *btp;
1535         struct xfs_dir2_data_hdr *d;
1536         xfs_dir2_db_t           db;
1537         xfs_dir2_data_entry_t   *dep;
1538         xfs_dir2_data_unused_t  *dup;
1539         struct xfs_dir2_data_free *bf;
1540         char                    *endptr;
1541         int                     error;
1542         xfs_fsblock_t           firstblock;
1543         struct xfs_defer_ops            dfops;
1544         char                    fname[MAXNAMELEN + 1];
1545         freetab_t               *freetab;
1546         int                     i;
1547         int                     ino_offset;
1548         xfs_ino_t               inum;
1549         ino_tree_node_t         *irec;
1550         int                     junkit;
1551         int                     lastfree;
1552         int                     len;
1553         int                     nbad;
1554         int                     needlog;
1555         int                     needscan;
1556         xfs_ino_t               parent;
1557         char                    *ptr;
1558         xfs_trans_t             *tp;
1559         int                     wantmagic;
1560         struct xfs_da_args      da = {
1561                 .dp = ip,
1562                 .geo = mp->m_dir_geo,
1563         };
1564
1565
1566         bp = *bpp;
1567         d = bp->b_addr;
1568         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1569         nbad = 0;
1570         needscan = needlog = 0;
1571         junkit = 0;
1572         freetab = *freetabp;
1573         if (isblock) {
1574                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1575                 blp = xfs_dir2_block_leaf_p(btp);
1576                 endptr = (char *)blp;
1577                 if (endptr > (char *)btp)
1578                         endptr = (char *)btp;
1579                 if (xfs_sb_version_hascrc(&mp->m_sb))
1580                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1581                 else
1582                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1583         } else {
1584                 endptr = (char *)d + mp->m_dir_geo->blksize;
1585                 if (xfs_sb_version_hascrc(&mp->m_sb))
1586                         wantmagic = XFS_DIR3_DATA_MAGIC;
1587                 else
1588                         wantmagic = XFS_DIR2_DATA_MAGIC;
1589         }
1590         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1591
1592         /* check for data block beyond expected end */
1593         if (freetab->naents <= db) {
1594                 struct freetab_ent e;
1595
1596                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1597                 if (!freetab) {
1598                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1599                                 __func__, FREETAB_SIZE(db + 1));
1600                 }
1601                 e.v = NULLDATAOFF;
1602                 e.s = 0;
1603                 for (i = freetab->naents; i < db; i++)
1604                         freetab->ents[i] = e;
1605                 freetab->naents = db + 1;
1606         }
1607
1608         /* check the data block */
1609         while (ptr < endptr) {
1610
1611                 /* check for freespace */
1612                 dup = (xfs_dir2_data_unused_t *)ptr;
1613                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1614
1615                         /* check for invalid freespace length */
1616                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1617                                         be16_to_cpu(dup->length) == 0 ||
1618                                         (be16_to_cpu(dup->length) &
1619                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1620                                 break;
1621
1622                         /* check for invalid tag */
1623                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1624                                                 (char *)dup - (char *)d)
1625                                 break;
1626
1627                         /* check for block with no data entries */
1628                         if ((ptr == (char *)M_DIROPS(mp)->data_entry_p(d)) &&
1629                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1630                                 junkit = 1;
1631                                 *num_illegal += 1;
1632                                 break;
1633                         }
1634
1635                         /* continue at the end of the freespace */
1636                         ptr += be16_to_cpu(dup->length);
1637                         if (ptr >= endptr)
1638                                 break;
1639                 }
1640
1641                 /* validate data entry size */
1642                 dep = (xfs_dir2_data_entry_t *)ptr;
1643                 if (ptr + M_DIROPS(mp)->data_entsize(dep->namelen) > endptr)
1644                         break;
1645                 if (be16_to_cpu(*M_DIROPS(mp)->data_entry_tag_p(dep)) !=
1646                                                 (char *)dep - (char *)d)
1647                         break;
1648                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1649         }
1650
1651         /* did we find an empty or corrupt block? */
1652         if (ptr != endptr) {
1653                 if (junkit) {
1654                         do_warn(
1655         _("empty data block %u in directory inode %" PRIu64 ": "),
1656                                 da_bno, ip->i_ino);
1657                 } else {
1658                         do_warn(_
1659         ("corrupt block %u in directory inode %" PRIu64 ": "),
1660                                 da_bno, ip->i_ino);
1661                 }
1662                 if (!no_modify) {
1663                         do_warn(_("junking block\n"));
1664                         dir2_kill_block(mp, ip, da_bno, bp);
1665                 } else {
1666                         do_warn(_("would junk block\n"));
1667                         libxfs_putbuf(bp);
1668                 }
1669                 freetab->ents[db].v = NULLDATAOFF;
1670                 *bpp = NULL;
1671                 return;
1672         }
1673
1674         /* update number of data blocks processed */
1675         if (freetab->nents < db + 1)
1676                 freetab->nents = db + 1;
1677
1678         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1679         if (error)
1680                 res_failed(error);
1681         da.trans = tp;
1682         libxfs_trans_ijoin(tp, ip, 0);
1683         libxfs_trans_bjoin(tp, bp);
1684         libxfs_trans_bhold(tp, bp);
1685         libxfs_defer_init(&dfops, &firstblock);
1686         if (be32_to_cpu(d->magic) != wantmagic) {
1687                 do_warn(
1688         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1689                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1690                 if (!no_modify) {
1691                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1692                         d->magic = cpu_to_be32(wantmagic);
1693                         needlog = 1;
1694                 } else
1695                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1696         }
1697         lastfree = 0;
1698         ptr = (char *)M_DIROPS(mp)->data_entry_p(d);
1699         /*
1700          * look at each entry.  reference inode pointed to by each
1701          * entry in the incore inode tree.
1702          * if not a directory, set reached flag, increment link count
1703          * if a directory and reached, mark entry as to be deleted.
1704          * if a directory, check to see if recorded parent
1705          *      matches current inode #,
1706          *      if so, then set reached flag, increment link count
1707          *              of current and child dir inodes, push the child
1708          *              directory inode onto the directory stack.
1709          *      if current inode != parent, then mark entry to be deleted.
1710          */
1711         while (ptr < endptr) {
1712                 dup = (xfs_dir2_data_unused_t *)ptr;
1713                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1714                         if (lastfree) {
1715                                 do_warn(
1716         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1717                                         ip->i_ino, da_bno);
1718                                 if (!no_modify) {
1719
1720                                         do_warn(_("joining together\n"));
1721                                         len = be16_to_cpu(dup->length);
1722                                         libxfs_dir2_data_use_free(&da, bp, dup,
1723                                                 ptr - (char *)d, len, &needlog,
1724                                                 &needscan);
1725                                         libxfs_dir2_data_make_free(&da, bp,
1726                                                 ptr - (char *)d, len, &needlog,
1727                                                 &needscan);
1728                                 } else
1729                                         do_warn(_("would join together\n"));
1730                         }
1731                         ptr += be16_to_cpu(dup->length);
1732                         lastfree = 1;
1733                         continue;
1734                 }
1735                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1736                                                   ptr - (char *)d);
1737                 dep = (xfs_dir2_data_entry_t *)ptr;
1738                 ptr += M_DIROPS(mp)->data_entsize(dep->namelen);
1739                 inum = be64_to_cpu(dep->inumber);
1740                 lastfree = 0;
1741                 /*
1742                  * skip bogus entries (leading '/').  they'll be deleted
1743                  * later.  must still log it, else we leak references to
1744                  * buffers.
1745                  */
1746                 if (dep->name[0] == '/')  {
1747                         nbad++;
1748                         if (!no_modify)
1749                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1750                         continue;
1751                 }
1752
1753                 memmove(fname, dep->name, dep->namelen);
1754                 fname[dep->namelen] = '\0';
1755                 ASSERT(inum != NULLFSINO);
1756
1757                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1758                                         XFS_INO_TO_AGINO(mp, inum));
1759                 if (irec == NULL)  {
1760                         nbad++;
1761                         if (entry_junked(
1762         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1763                                         fname, ip->i_ino, inum)) {
1764                                 dep->name[0] = '/';
1765                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1766                         }
1767                         continue;
1768                 }
1769                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1770
1771                 /*
1772                  * if it's a free inode, blow out the entry.
1773                  * by now, any inode that we think is free
1774                  * really is free.
1775                  */
1776                 if (is_inode_free(irec, ino_offset))  {
1777                         nbad++;
1778                         if (entry_junked(
1779         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1780                                         fname, ip->i_ino, inum)) {
1781                                 dep->name[0] = '/';
1782                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1783                         }
1784                         continue;
1785                 }
1786
1787                 /*
1788                  * check if this inode is lost+found dir in the root
1789                  */
1790                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1791                         /*
1792                          * if it's not a directory, trash it
1793                          */
1794                         if (!inode_isadir(irec, ino_offset)) {
1795                                 nbad++;
1796                                 if (entry_junked(
1797         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1798                                                 ORPHANAGE, inum, ip->i_ino)) {
1799                                         dep->name[0] = '/';
1800                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1801                                 }
1802                                 continue;
1803                         }
1804                         /*
1805                          * if this is a dup, it will be picked up below,
1806                          * otherwise, mark it as the orphanage for later.
1807                          */
1808                         if (!orphanage_ino)
1809                                 orphanage_ino = inum;
1810                 }
1811
1812                 /*
1813                  * check for duplicate names in directory.
1814                  */
1815                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1816                                 dep->name, M_DIROPS(mp)->data_get_ftype(dep))) {
1817                         nbad++;
1818                         if (entry_junked(
1819         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1820                                         fname, inum, ip->i_ino)) {
1821                                 dep->name[0] = '/';
1822                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1823                         }
1824                         if (inum == orphanage_ino)
1825                                 orphanage_ino = 0;
1826                         continue;
1827                 }
1828
1829                 /*
1830                  * if just scanning to rebuild a directory due to a ".."
1831                  * update, just continue
1832                  */
1833                 if (dotdot_update)
1834                         continue;
1835
1836                 /*
1837                  * skip the '..' entry since it's checked when the
1838                  * directory is reached by something else.  if it never
1839                  * gets reached, it'll be moved to the orphanage and we'll
1840                  * take care of it then. If it doesn't exist at all, the
1841                  * directory needs to be rebuilt first before being added
1842                  * to the orphanage.
1843                  */
1844                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1845                                 dep->name[1] == '.') {
1846                         if (da_bno != 0) {
1847                                 /* ".." should be in the first block */
1848                                 nbad++;
1849                                 if (entry_junked(
1850         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1851                                                 inum, ip->i_ino)) {
1852                                         dep->name[0] = '/';
1853                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1854                                 }
1855                         }
1856                         continue;
1857                 }
1858                 ASSERT(no_modify || !verify_inum(mp, inum));
1859                 /*
1860                  * special case the . entry.  we know there's only one
1861                  * '.' and only '.' points to itself because bogus entries
1862                  * got trashed in phase 3 if there were > 1.
1863                  * bump up link count for '.' but don't set reached
1864                  * until we're actually reached by another directory
1865                  * '..' is already accounted for or will be taken care
1866                  * of when directory is moved to orphanage.
1867                  */
1868                 if (ip->i_ino == inum)  {
1869                         ASSERT(no_modify ||
1870                                (dep->name[0] == '.' && dep->namelen == 1));
1871                         add_inode_ref(current_irec, current_ino_offset);
1872                         if (da_bno != 0 ||
1873                             dep != M_DIROPS(mp)->data_entry_p(d)) {
1874                                 /* "." should be the first entry */
1875                                 nbad++;
1876                                 if (entry_junked(
1877         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1878                                                 fname, inum, ip->i_ino)) {
1879                                         dep->name[0] = '/';
1880                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1881                                 }
1882                         }
1883                         *need_dot = 0;
1884                         continue;
1885                 }
1886                 /*
1887                  * skip entries with bogus inumbers if we're in no modify mode
1888                  */
1889                 if (no_modify && verify_inum(mp, inum))
1890                         continue;
1891
1892                 /* validate ftype field if supported */
1893                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1894                         uint8_t dir_ftype;
1895                         uint8_t ino_ftype;
1896
1897                         dir_ftype = M_DIROPS(mp)->data_get_ftype(dep);
1898                         ino_ftype = get_inode_ftype(irec, ino_offset);
1899
1900                         if (dir_ftype != ino_ftype) {
1901                                 if (no_modify) {
1902                                         do_warn(
1903         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1904                                                 dir_ftype, ino_ftype,
1905                                                 ip->i_ino, inum);
1906                                 } else {
1907                                         do_warn(
1908         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1909                                                 dir_ftype, ino_ftype,
1910                                                 ip->i_ino, inum);
1911                                         M_DIROPS(mp)->data_put_ftype(dep,
1912                                                                 ino_ftype);
1913                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1914                                         dir_hash_update_ftype(hashtab, addr,
1915                                                               ino_ftype);
1916                                 }
1917                         }
1918                 }
1919
1920                 /*
1921                  * check easy case first, regular inode, just bump
1922                  * the link count and continue
1923                  */
1924                 if (!inode_isadir(irec, ino_offset))  {
1925                         add_inode_reached(irec, ino_offset);
1926                         continue;
1927                 }
1928                 parent = get_inode_parent(irec, ino_offset);
1929                 ASSERT(parent != 0);
1930                 junkit = 0;
1931                 /*
1932                  * bump up the link counts in parent and child
1933                  * directory but if the link doesn't agree with
1934                  * the .. in the child, blow out the entry.
1935                  * if the directory has already been reached,
1936                  * blow away the entry also.
1937                  */
1938                 if (is_inode_reached(irec, ino_offset))  {
1939                         junkit = 1;
1940                         do_warn(
1941 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1942                                 fname, ip->i_ino, inum);
1943                 } else if (parent == ip->i_ino)  {
1944                         add_inode_reached(irec, ino_offset);
1945                         add_inode_ref(current_irec, current_ino_offset);
1946                 } else if (parent == NULLFSINO) {
1947                         /* ".." was missing, but this entry refers to it,
1948                            so, set it as the parent and mark for rebuild */
1949                         do_warn(
1950         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1951                                 fname, ip->i_ino, inum);
1952                         set_inode_parent(irec, ino_offset, ip->i_ino);
1953                         add_inode_reached(irec, ino_offset);
1954                         add_inode_ref(current_irec, current_ino_offset);
1955                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1956                                                                 ino_offset);
1957                 } else  {
1958                         junkit = 1;
1959                         do_warn(
1960 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1961                                 fname, ip->i_ino, parent, inum);
1962                 }
1963                 if (junkit)  {
1964                         if (inum == orphanage_ino)
1965                                 orphanage_ino = 0;
1966                         nbad++;
1967                         if (!no_modify)  {
1968                                 dep->name[0] = '/';
1969                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1970                                 if (verbose)
1971                                         do_warn(
1972                                         _("\twill clear entry \"%s\"\n"),
1973                                                 fname);
1974                         } else  {
1975                                 do_warn(_("\twould clear entry \"%s\"\n"),
1976                                         fname);
1977                         }
1978                 }
1979         }
1980         *num_illegal += nbad;
1981         if (needscan)
1982                 libxfs_dir2_data_freescan_int(mp->m_dir_geo, M_DIROPS(mp),
1983                                 d, &i);
1984         if (needlog)
1985                 libxfs_dir2_data_log_header(&da, bp);
1986         libxfs_defer_ijoin(&dfops, ip);
1987         libxfs_defer_finish(&tp, &dfops);
1988         libxfs_trans_commit(tp);
1989
1990         /* record the largest free space in the freetab for later checking */
1991         bf = M_DIROPS(mp)->data_bestfree_p(d);
1992         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1993         freetab->ents[db].s = 0;
1994 }
1995
1996 /* check v5 metadata */
1997 static int
1998 __check_dir3_header(
1999         struct xfs_mount        *mp,
2000         struct xfs_buf          *bp,
2001         xfs_ino_t               ino,
2002         __be64                  owner,
2003         __be64                  blkno,
2004         uuid_t                  *uuid)
2005 {
2006
2007         /* verify owner */
2008         if (be64_to_cpu(owner) != ino) {
2009                 do_warn(
2010 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
2011                         ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
2012                 return 1;
2013         }
2014         /* verify block number */
2015         if (be64_to_cpu(blkno) != bp->b_bn) {
2016                 do_warn(
2017 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
2018                         bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
2019                 return 1;
2020         }
2021         /* verify uuid */
2022         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
2023                 do_warn(
2024 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
2025                         ino, bp->b_bn);
2026                 return 1;
2027         }
2028
2029         return 0;
2030 }
2031
2032 static int
2033 check_da3_header(
2034         struct xfs_mount        *mp,
2035         struct xfs_buf          *bp,
2036         xfs_ino_t               ino)
2037 {
2038         struct xfs_da3_blkinfo  *info = bp->b_addr;
2039
2040         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2041                                    &info->uuid);
2042 }
2043
2044 static int
2045 check_dir3_header(
2046         struct xfs_mount        *mp,
2047         struct xfs_buf          *bp,
2048         xfs_ino_t               ino)
2049 {
2050         struct xfs_dir3_blk_hdr *info = bp->b_addr;
2051
2052         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
2053                                    &info->uuid);
2054 }
2055
2056 /*
2057  * Check contents of leaf-form block.
2058  */
2059 static int
2060 longform_dir2_check_leaf(
2061         xfs_mount_t             *mp,
2062         xfs_inode_t             *ip,
2063         dir_hash_tab_t          *hashtab,
2064         freetab_t               *freetab)
2065 {
2066         int                     badtail;
2067         __be16                  *bestsp;
2068         struct xfs_buf          *bp;
2069         xfs_dablk_t             da_bno;
2070         int                     i;
2071         xfs_dir2_leaf_t         *leaf;
2072         xfs_dir2_leaf_tail_t    *ltp;
2073         int                     seeval;
2074         struct xfs_dir2_leaf_entry *ents;
2075         struct xfs_dir3_icleaf_hdr leafhdr;
2076         int                     error;
2077         int                     fixit = 0;
2078
2079         da_bno = mp->m_dir_geo->leafblk;
2080         error = dir_read_buf(ip, da_bno, -1, &bp, &xfs_dir3_leaf1_buf_ops,
2081                              &fixit);
2082         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
2083                 do_warn(
2084         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
2085                         da_bno, ip->i_ino);
2086                 return 1;
2087         } else if (error) {
2088                 do_error(
2089         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
2090                         da_bno, ip->i_ino, error);
2091                 /* NOTREACHED */
2092         }
2093
2094         leaf = bp->b_addr;
2095         M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2096         ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2097         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
2098         bestsp = xfs_dir2_leaf_bests_p(ltp);
2099         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
2100               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
2101                                 leafhdr.forw || leafhdr.back ||
2102                                 leafhdr.count < leafhdr.stale ||
2103                                 leafhdr.count >
2104                                         M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2105                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2106                 do_warn(
2107         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2108                         da_bno, ip->i_ino);
2109                 libxfs_putbuf(bp);
2110                 return 1;
2111         }
2112
2113         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2114                 error = check_da3_header(mp, bp, ip->i_ino);
2115                 if (error) {
2116                         libxfs_putbuf(bp);
2117                         return error;
2118                 }
2119         }
2120
2121         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2122         if (dir_hash_check(hashtab, ip, seeval)) {
2123                 libxfs_putbuf(bp);
2124                 return 1;
2125         }
2126         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2127         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2128                 freetab->ents[i].s = 1;
2129                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2130         }
2131         if (badtail) {
2132                 do_warn(
2133         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2134                         da_bno, ip->i_ino);
2135                 libxfs_putbuf(bp);
2136                 return 1;
2137         }
2138         libxfs_putbuf(bp);
2139         return fixit;
2140 }
2141
2142 /*
2143  * Check contents of the node blocks (leaves)
2144  * Looks for matching hash values for the data entries.
2145  */
2146 static int
2147 longform_dir2_check_node(
2148         xfs_mount_t             *mp,
2149         xfs_inode_t             *ip,
2150         dir_hash_tab_t          *hashtab,
2151         freetab_t               *freetab)
2152 {
2153         struct xfs_buf          *bp;
2154         xfs_dablk_t             da_bno;
2155         xfs_dir2_db_t           fdb;
2156         xfs_dir2_free_t         *free;
2157         int                     i;
2158         xfs_dir2_leaf_t         *leaf;
2159         xfs_fileoff_t           next_da_bno;
2160         int                     seeval = 0;
2161         int                     used;
2162         struct xfs_dir2_leaf_entry *ents;
2163         struct xfs_dir3_icleaf_hdr leafhdr;
2164         struct xfs_dir3_icfree_hdr freehdr;
2165         __be16                  *bests;
2166         int                     error;
2167         int                     fixit = 0;
2168
2169         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2170                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2171                         da_bno = (xfs_dablk_t)next_da_bno) {
2172                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2173                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2174                         break;
2175
2176                 /*
2177                  * we need to use the da3 node verifier here as it handles the
2178                  * fact that reading the leaf hash tree blocks can return either
2179                  * leaf or node blocks and calls the correct verifier. If we get
2180                  * a node block, then we'll skip it below based on a magic
2181                  * number check.
2182                  */
2183                 error = dir_read_buf(ip, da_bno, -1, &bp,
2184                                      &xfs_da3_node_buf_ops, &fixit);
2185                 if (error) {
2186                         do_warn(
2187         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2188                                 da_bno, ip->i_ino, error);
2189                         return 1;
2190                 }
2191                 leaf = bp->b_addr;
2192                 M_DIROPS(mp)->leaf_hdr_from_disk(&leafhdr, leaf);
2193                 ents = M_DIROPS(mp)->leaf_ents_p(leaf);
2194                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2195                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2196                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2197                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2198                         do_warn(
2199         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2200                                 leafhdr.magic, da_bno, ip->i_ino);
2201                         libxfs_putbuf(bp);
2202                         return 1;
2203                 }
2204
2205                 /* check v5 metadata */
2206                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2207                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2208                         error = check_da3_header(mp, bp, ip->i_ino);
2209                         if (error) {
2210                                 libxfs_putbuf(bp);
2211                                 return error;
2212                         }
2213                 }
2214
2215                 /* ignore nodes */
2216                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2217                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2218                         libxfs_putbuf(bp);
2219                         continue;
2220                 }
2221
2222                 /*
2223                  * If there's a validator error, we need to ensure that we got
2224                  * the right ops on the buffer for when we write it back out.
2225                  */
2226                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2227                 if (leafhdr.count > M_DIROPS(mp)->leaf_max_ents(mp->m_dir_geo) ||
2228                     leafhdr.count < leafhdr.stale) {
2229                         do_warn(
2230         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2231                                 da_bno, ip->i_ino);
2232                         libxfs_putbuf(bp);
2233                         return 1;
2234                 }
2235                 seeval = dir_hash_see_all(hashtab, ents,
2236                                         leafhdr.count, leafhdr.stale);
2237                 libxfs_putbuf(bp);
2238                 if (seeval != DIR_HASH_CK_OK)
2239                         return 1;
2240         }
2241         if (dir_hash_check(hashtab, ip, seeval))
2242                 return 1;
2243
2244         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2245              next_da_bno != NULLFILEOFF;
2246              da_bno = (xfs_dablk_t)next_da_bno) {
2247                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2248                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
2249                         break;
2250
2251                 error = dir_read_buf(ip, da_bno, -1, &bp,
2252                                      &xfs_dir3_free_buf_ops, &fixit);
2253                 if (error) {
2254                         do_warn(
2255         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2256                                 da_bno, ip->i_ino, error);
2257                         return 1;
2258                 }
2259                 free = bp->b_addr;
2260                 M_DIROPS(mp)->free_hdr_from_disk(&freehdr, free);
2261                 bests = M_DIROPS(mp)->free_bests_p(free);
2262                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2263                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2264                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2265                     freehdr.firstdb !=
2266                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2267                         M_DIROPS(mp)->free_max_bests(mp->m_dir_geo) ||
2268                     freehdr.nvalid < freehdr.nused) {
2269                         do_warn(
2270         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2271                                 da_bno, ip->i_ino);
2272                         libxfs_putbuf(bp);
2273                         return 1;
2274                 }
2275
2276                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2277                         error = check_dir3_header(mp, bp, ip->i_ino);
2278                         if (error) {
2279                                 libxfs_putbuf(bp);
2280                                 return error;
2281                         }
2282                 }
2283                 for (i = used = 0; i < freehdr.nvalid; i++) {
2284                         if (i + freehdr.firstdb >= freetab->nents ||
2285                                         freetab->ents[i + freehdr.firstdb].v !=
2286                                                 be16_to_cpu(bests[i])) {
2287                                 do_warn(
2288         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2289                                         da_bno, i, ip->i_ino);
2290                                 libxfs_putbuf(bp);
2291                                 return 1;
2292                         }
2293                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2294                         freetab->ents[i + freehdr.firstdb].s = 1;
2295                 }
2296                 if (used != freehdr.nused) {
2297                         do_warn(
2298         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2299                                 da_bno, ip->i_ino);
2300                         libxfs_putbuf(bp);
2301                         return 1;
2302                 }
2303                 libxfs_putbuf(bp);
2304         }
2305         for (i = 0; i < freetab->nents; i++) {
2306                 if ((freetab->ents[i].s == 0) &&
2307                     (freetab->ents[i].v != NULLDATAOFF)) {
2308                         do_warn(
2309         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2310                                 i, ip->i_ino);
2311                         return 1;
2312                 }
2313         }
2314         return fixit;
2315 }
2316
2317 /*
2318  * If a directory is corrupt, we need to read in as many entries as possible,
2319  * destroy the entry and create a new one with recovered name/inode pairs.
2320  * (ie. get libxfs to do all the grunt work)
2321  */
2322 static void
2323 longform_dir2_entry_check(xfs_mount_t   *mp,
2324                         xfs_ino_t       ino,
2325                         xfs_inode_t     *ip,
2326                         int             *num_illegal,
2327                         int             *need_dot,
2328                         ino_tree_node_t *irec,
2329                         int             ino_offset,
2330                         dir_hash_tab_t  *hashtab)
2331 {
2332         struct xfs_buf          **bplist;
2333         xfs_dablk_t             da_bno;
2334         freetab_t               *freetab;
2335         int                     num_bps;
2336         int                     i;
2337         int                     isblock;
2338         int                     isleaf;
2339         xfs_fileoff_t           next_da_bno;
2340         int                     seeval;
2341         int                     fixit = 0;
2342         xfs_dir2_db_t           db;
2343         struct xfs_da_args      args;
2344
2345         *need_dot = 1;
2346         freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2347         if (!freetab) {
2348                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2349                         __func__,
2350                         FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2351                 exit(1);
2352         }
2353         freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2354         freetab->nents = 0;
2355         for (i = 0; i < freetab->naents; i++) {
2356                 freetab->ents[i].v = NULLDATAOFF;
2357                 freetab->ents[i].s = 0;
2358         }
2359         num_bps = freetab->naents;
2360         bplist = calloc(num_bps, sizeof(struct xfs_buf*));
2361         if (!bplist)
2362                 do_error(_("calloc failed in %s (%zu bytes)\n"),
2363                         __func__, num_bps * sizeof(struct xfs_buf*));
2364
2365         /* is this a block, leaf, or node directory? */
2366         args.dp = ip;
2367         args.geo = mp->m_dir_geo;
2368         libxfs_dir2_isblock(&args, &isblock);
2369         libxfs_dir2_isleaf(&args, &isleaf);
2370
2371         /* check directory "data" blocks (ie. name/inode pairs) */
2372         for (da_bno = 0, next_da_bno = 0;
2373              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2374              da_bno = (xfs_dablk_t)next_da_bno) {
2375                 const struct xfs_buf_ops *ops;
2376                 int                      error;
2377                 struct xfs_dir2_data_hdr *d;
2378
2379                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2380                 if (bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) {
2381                         /*
2382                          * if this is the first block, there isn't anything we
2383                          * can recover so we just trash it.
2384                          */
2385                          if (da_bno == 0) {
2386                                 fixit++;
2387                                 goto out_fix;
2388                         }
2389                         break;
2390                 }
2391
2392                 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2393                 if (db >= num_bps) {
2394                         /* more data blocks than expected */
2395                         num_bps = db + 1;
2396                         bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
2397                         if (!bplist)
2398                                 do_error(_("realloc failed in %s (%zu bytes)\n"),
2399                                         __func__,
2400                                         num_bps * sizeof(struct xfs_buf*));
2401                 }
2402
2403                 if (isblock)
2404                         ops = &xfs_dir3_block_buf_ops;
2405                 else
2406                         ops = &xfs_dir3_data_buf_ops;
2407
2408                 error = dir_read_buf(ip, da_bno, -1, &bplist[db], ops, &fixit);
2409                 if (error) {
2410                         do_warn(
2411         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2412                                 da_bno, ino, error);
2413                         *num_illegal += 1;
2414
2415                         /*
2416                          * we try to read all "data" blocks, but if we are in
2417                          * block form and we fail, there isn't anything else to
2418                          * read, and nothing we can do but trash it.
2419                          */
2420                         if (isblock) {
2421                                 fixit++;
2422                                 goto out_fix;
2423                         }
2424                         continue;
2425                 }
2426
2427                 /* check v5 metadata */
2428                 d = bplist[db]->b_addr;
2429                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2430                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2431                         struct xfs_buf           *bp = bplist[db];
2432
2433                         error = check_dir3_header(mp, bp, ino);
2434                         if (error) {
2435                                 fixit++;
2436                                 continue;
2437                         }
2438                 }
2439
2440                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2441                                 irec, ino_offset, &bplist[db], hashtab,
2442                                 &freetab, da_bno, isblock);
2443         }
2444         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2445
2446         if (!dotdot_update) {
2447                 /* check btree and freespace */
2448                 if (isblock) {
2449                         struct xfs_dir2_data_hdr *block;
2450                         xfs_dir2_block_tail_t   *btp;
2451                         xfs_dir2_leaf_entry_t   *blp;
2452
2453                         block = bplist[0]->b_addr;
2454                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2455                         blp = xfs_dir2_block_leaf_p(btp);
2456                         seeval = dir_hash_see_all(hashtab, blp,
2457                                                 be32_to_cpu(btp->count),
2458                                                 be32_to_cpu(btp->stale));
2459                         if (dir_hash_check(hashtab, ip, seeval))
2460                                 fixit |= 1;
2461                 } else if (isleaf) {
2462                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2463                                                                 freetab);
2464                 } else {
2465                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2466                                                                 freetab);
2467                 }
2468         }
2469 out_fix:
2470         if (!no_modify && (fixit || dotdot_update)) {
2471                 dir_hash_dup_names(hashtab);
2472                 for (i = 0; i < num_bps; i++)
2473                         if (bplist[i])
2474                                 libxfs_putbuf(bplist[i]);
2475                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2476                 *num_illegal = 0;
2477                 *need_dot = 0;
2478         } else {
2479                 for (i = 0; i < num_bps; i++)
2480                         if (bplist[i])
2481                                 libxfs_putbuf(bplist[i]);
2482         }
2483
2484         free(bplist);
2485         free(freetab);
2486 }
2487
2488 /*
2489  * shortform directory v2 processing routines -- entry verification and
2490  * bad entry deletion (pruning).
2491  */
2492 static struct xfs_dir2_sf_entry *
2493 shortform_dir2_junk(
2494         struct xfs_mount        *mp,
2495         struct xfs_dir2_sf_hdr  *sfp,
2496         struct xfs_dir2_sf_entry *sfep,
2497         xfs_ino_t               lino,
2498         int                     *max_size,
2499         int                     *index,
2500         int                     *bytes_deleted,
2501         int                     *ino_dirty)
2502 {
2503         struct xfs_dir2_sf_entry *next_sfep;
2504         int                     next_len;
2505         int                     next_elen;
2506
2507         if (lino == orphanage_ino)
2508                 orphanage_ino = 0;
2509
2510         next_elen = M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen);
2511         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2512
2513         /*
2514          * if we are just checking, simply return the pointer to the next entry
2515          * here so that the checking loop can continue.
2516          */
2517         if (no_modify) {
2518                 do_warn(_("would junk entry\n"));
2519                 return next_sfep;
2520         }
2521
2522         /*
2523          * now move all the remaining entries down over the junked entry and
2524          * clear the newly unused bytes at the tail of the directory region.
2525          */
2526         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2527         *max_size -= next_elen;
2528         *bytes_deleted += next_elen;
2529
2530         memmove(sfep, next_sfep, next_len);
2531         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2532         sfp->count -= 1;
2533         *ino_dirty = 1;
2534
2535         /*
2536          * WARNING:  drop the index i by one so it matches the decremented count
2537          * for accurate comparisons in the loop test
2538          */
2539         (*index)--;
2540
2541         if (verbose)
2542                 do_warn(_("junking entry\n"));
2543         else
2544                 do_warn("\n");
2545         return sfep;
2546 }
2547
2548 static void
2549 shortform_dir2_entry_check(xfs_mount_t  *mp,
2550                         xfs_ino_t       ino,
2551                         xfs_inode_t     *ip,
2552                         int             *ino_dirty,
2553                         ino_tree_node_t *current_irec,
2554                         int             current_ino_offset,
2555                         dir_hash_tab_t  *hashtab)
2556 {
2557         xfs_ino_t               lino;
2558         xfs_ino_t               parent;
2559         struct xfs_dir2_sf_hdr  *sfp;
2560         struct xfs_dir2_sf_entry *sfep;
2561         struct xfs_dir2_sf_entry *next_sfep;
2562         struct xfs_ifork        *ifp;
2563         struct ino_tree_node    *irec;
2564         int                     max_size;
2565         int                     ino_offset;
2566         int                     i;
2567         int                     bad_sfnamelen;
2568         int                     namelen;
2569         int                     bytes_deleted;
2570         char                    fname[MAXNAMELEN + 1];
2571         int                     i8;
2572
2573         ifp = &ip->i_df;
2574         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2575         *ino_dirty = 0;
2576         bytes_deleted = 0;
2577
2578         max_size = ifp->if_bytes;
2579         ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2580
2581         /*
2582          * if just rebuild a directory due to a "..", update and return
2583          */
2584         if (dotdot_update) {
2585                 parent = get_inode_parent(current_irec, current_ino_offset);
2586                 if (no_modify) {
2587                         do_warn(
2588         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2589                                 ino, parent);
2590                 } else {
2591                         do_warn(
2592         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2593                                 ino, parent);
2594                         M_DIROPS(mp)->sf_put_parent_ino(sfp, parent);
2595                         *ino_dirty = 1;
2596                 }
2597                 return;
2598         }
2599
2600         /*
2601          * no '.' entry in shortform dirs, just bump up ref count by 1
2602          * '..' was already (or will be) accounted for and checked when
2603          * the directory is reached or will be taken care of when the
2604          * directory is moved to orphanage.
2605          */
2606         add_inode_ref(current_irec, current_ino_offset);
2607
2608         /*
2609          * Initialise i8 counter -- the parent inode number counts as well.
2610          */
2611         i8 = M_DIROPS(mp)->sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2612
2613         /*
2614          * now run through entries, stop at first bad entry, don't need
2615          * to skip over '..' since that's encoded in its own field and
2616          * no need to worry about '.' since it doesn't exist.
2617          */
2618         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2619
2620         for (i = 0; i < sfp->count && max_size >
2621                                         (intptr_t)next_sfep - (intptr_t)sfp;
2622                         sfep = next_sfep, i++)  {
2623                 bad_sfnamelen = 0;
2624
2625                 lino = M_DIROPS(mp)->sf_get_ino(sfp, sfep);
2626
2627                 namelen = sfep->namelen;
2628
2629                 ASSERT(no_modify || namelen > 0);
2630
2631                 if (no_modify && namelen == 0)  {
2632                         /*
2633                          * if we're really lucky, this is
2634                          * the last entry in which case we
2635                          * can use the dir size to set the
2636                          * namelen value.  otherwise, forget
2637                          * it because we're not going to be
2638                          * able to find the next entry.
2639                          */
2640                         bad_sfnamelen = 1;
2641
2642                         if (i == sfp->count - 1)  {
2643                                 namelen = ip->i_d.di_size -
2644                                         ((intptr_t) &sfep->name[0] -
2645                                          (intptr_t) sfp);
2646                         } else  {
2647                                 /*
2648                                  * don't process the rest of the directory,
2649                                  * break out of processing loop
2650                                  */
2651                                 break;
2652                         }
2653                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2654                                 + M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)
2655                                 > ip->i_d.di_size)  {
2656                         bad_sfnamelen = 1;
2657
2658                         if (i == sfp->count - 1)  {
2659                                 namelen = ip->i_d.di_size -
2660                                         ((intptr_t) &sfep->name[0] -
2661                                          (intptr_t) sfp);
2662                         } else  {
2663                                 /*
2664                                  * don't process the rest of the directory,
2665                                  * break out of processing loop
2666                                  */
2667                                 break;
2668                         }
2669                 }
2670
2671                 memmove(fname, sfep->name, sfep->namelen);
2672                 fname[sfep->namelen] = '\0';
2673
2674                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2675                 ASSERT(no_modify || !verify_inum(mp, lino));
2676
2677                 /*
2678                  * Also skip entries with bogus inode numbers if we're
2679                  * in no modify mode.
2680                  */
2681
2682                 if (no_modify && verify_inum(mp, lino))  {
2683                         next_sfep = M_DIROPS(mp)->sf_nextentry(sfp, sfep);
2684                         continue;
2685                 }
2686
2687                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2688                                         XFS_INO_TO_AGINO(mp, lino));
2689
2690                 if (irec == NULL)  {
2691                         do_warn(
2692         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2693                                 fname, ino, lino);
2694                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2695                                                 &max_size, &i, &bytes_deleted,
2696                                                 ino_dirty);
2697                         continue;
2698                 }
2699
2700                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2701
2702                 /*
2703                  * if it's a free inode, blow out the entry.
2704                  * by now, any inode that we think is free
2705                  * really is free.
2706                  */
2707                 if (is_inode_free(irec, ino_offset))  {
2708                         do_warn(
2709         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2710                                 fname, ino, lino);
2711                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2712                                                 &max_size, &i, &bytes_deleted,
2713                                                 ino_dirty);
2714                         continue;
2715                 }
2716                 /*
2717                  * check if this inode is lost+found dir in the root
2718                  */
2719                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2720                         /*
2721                          * if it's not a directory, trash it
2722                          */
2723                         if (!inode_isadir(irec, ino_offset)) {
2724                                 do_warn(
2725         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2726                                         ORPHANAGE, lino, ino);
2727                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2728                                                 lino, &max_size, &i,
2729                                                 &bytes_deleted, ino_dirty);
2730                                 continue;
2731                         }
2732                         /*
2733                          * if this is a dup, it will be picked up below,
2734                          * otherwise, mark it as the orphanage for later.
2735                          */
2736                         if (!orphanage_ino)
2737                                 orphanage_ino = lino;
2738                 }
2739                 /*
2740                  * check for duplicate names in directory.
2741                  */
2742                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2743                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2744                                 lino, sfep->namelen, sfep->name,
2745                                 M_DIROPS(mp)->sf_get_ftype(sfep))) {
2746                         do_warn(
2747 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2748                                 fname, lino, ino);
2749                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2750                                                 &max_size, &i, &bytes_deleted,
2751                                                 ino_dirty);
2752                         continue;
2753                 }
2754
2755                 if (!inode_isadir(irec, ino_offset))  {
2756                         /*
2757                          * check easy case first, regular inode, just bump
2758                          * the link count
2759                          */
2760                         add_inode_reached(irec, ino_offset);
2761                 } else  {
2762                         parent = get_inode_parent(irec, ino_offset);
2763
2764                         /*
2765                          * bump up the link counts in parent and child.
2766                          * directory but if the link doesn't agree with
2767                          * the .. in the child, blow out the entry
2768                          */
2769                         if (is_inode_reached(irec, ino_offset))  {
2770                                 do_warn(
2771         _("entry \"%s\" in directory inode %" PRIu64
2772           " references already connected inode %" PRIu64 ".\n"),
2773                                         fname, ino, lino);
2774                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2775                                                 lino, &max_size, &i,
2776                                                 &bytes_deleted, ino_dirty);
2777                                 continue;
2778                         } else if (parent == ino)  {
2779                                 add_inode_reached(irec, ino_offset);
2780                                 add_inode_ref(current_irec, current_ino_offset);
2781                         } else if (parent == NULLFSINO) {
2782                                 /* ".." was missing, but this entry refers to it,
2783                                 so, set it as the parent and mark for rebuild */
2784                                 do_warn(
2785         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2786                                         fname, ino, lino);
2787                                 set_inode_parent(irec, ino_offset, ino);
2788                                 add_inode_reached(irec, ino_offset);
2789                                 add_inode_ref(current_irec, current_ino_offset);
2790                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2791                                                         irec, ino_offset);
2792                         } else  {
2793                                 do_warn(
2794         _("entry \"%s\" in directory inode %" PRIu64
2795           " not consistent with .. value (%" PRIu64
2796           ") in inode %" PRIu64 ",\n"),
2797                                         fname, ino, parent, lino);
2798                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2799                                                 lino, &max_size, &i,
2800                                                 &bytes_deleted, ino_dirty);
2801                                 continue;
2802                         }
2803                 }
2804
2805                 /* validate ftype field if supported */
2806                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2807                         uint8_t dir_ftype;
2808                         uint8_t ino_ftype;
2809
2810                         dir_ftype = M_DIROPS(mp)->sf_get_ftype(sfep);
2811                         ino_ftype = get_inode_ftype(irec, ino_offset);
2812
2813                         if (dir_ftype != ino_ftype) {
2814                                 if (no_modify) {
2815                                         do_warn(
2816         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2817                                                 dir_ftype, ino_ftype,
2818                                                 ino, lino);
2819                                 } else {
2820                                         do_warn(
2821         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2822                                                 dir_ftype, ino_ftype,
2823                                                 ino, lino);
2824                                         M_DIROPS(mp)->sf_put_ftype(sfep,
2825                                                                 ino_ftype);
2826                                         dir_hash_update_ftype(hashtab,
2827                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2828                                                               ino_ftype);
2829                                         *ino_dirty = 1;
2830                                 }
2831                         }
2832                 }
2833
2834                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2835                         i8++;
2836
2837                 /*
2838                  * go onto next entry - we have to take entries with bad namelen
2839                  * into account in no modify mode since we calculate size based
2840                  * on next_sfep.
2841                  */
2842                 ASSERT(no_modify || bad_sfnamelen == 0);
2843                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2844                               (bad_sfnamelen
2845                                 ? M_DIROPS(mp)->sf_entsize(sfp, namelen)
2846                                 : M_DIROPS(mp)->sf_entsize(sfp, sfep->namelen)));
2847         }
2848
2849         if (sfp->i8count != i8) {
2850                 if (no_modify) {
2851                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2852                                 ino);
2853                 } else {
2854                         if (i8 == 0) {
2855                                 struct xfs_dir2_sf_entry *tmp_sfep;
2856
2857                                 tmp_sfep = next_sfep;
2858                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2859                                 bytes_deleted +=
2860                                         (intptr_t)next_sfep -
2861                                         (intptr_t)tmp_sfep;
2862                                 next_sfep = tmp_sfep;
2863                         } else
2864                                 sfp->i8count = i8;
2865                         *ino_dirty = 1;
2866                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2867                                 ino);
2868                 }
2869         }
2870
2871         /*
2872          * sync up sizes if required
2873          */
2874         if (*ino_dirty && bytes_deleted > 0)  {
2875                 ASSERT(!no_modify);
2876                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2877                 ip->i_d.di_size -= bytes_deleted;
2878         }
2879
2880         if (ip->i_d.di_size != ip->i_df.if_bytes)  {
2881                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2882                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2883                 ip->i_d.di_size = (xfs_fsize_t)
2884                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2885                 do_warn(
2886         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2887                         ip->i_d.di_size);
2888                 *ino_dirty = 1;
2889         }
2890 }
2891
2892 /*
2893  * processes all reachable inodes in directories
2894  */
2895 static void
2896 process_dir_inode(
2897         xfs_mount_t             *mp,
2898         xfs_agnumber_t          agno,
2899         ino_tree_node_t         *irec,
2900         int                     ino_offset)
2901 {
2902         xfs_ino_t               ino;
2903         struct xfs_defer_ops            dfops;
2904         xfs_fsblock_t           first;
2905         xfs_inode_t             *ip;
2906         xfs_trans_t             *tp;
2907         dir_hash_tab_t          *hashtab;
2908         int                     need_dot;
2909         int                     dirty, num_illegal, error, nres;
2910
2911         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2912
2913         /*
2914          * open up directory inode, check all entries,
2915          * then call prune_dir_entries to remove all
2916          * remaining illegal directory entries.
2917          */
2918
2919         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2920
2921         error = -libxfs_iget(mp, NULL, ino, 0, &ip, &phase6_ifork_ops);
2922         if (error) {
2923                 if (!no_modify)
2924                         do_error(
2925         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2926                                 ino, error);
2927                 else  {
2928                         do_warn(
2929         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2930                                 ino, error);
2931                         /*
2932                          * see below for what we're doing if this
2933                          * is root.  Why do we need to do this here?
2934                          * to ensure that the root doesn't show up
2935                          * as being disconnected in the no_modify case.
2936                          */
2937                         if (mp->m_sb.sb_rootino == ino)  {
2938                                 add_inode_reached(irec, 0);
2939                                 add_inode_ref(irec, 0);
2940                         }
2941                 }
2942
2943                 add_inode_refchecked(irec, 0);
2944                 return;
2945         }
2946
2947         need_dot = dirty = num_illegal = 0;
2948
2949         if (mp->m_sb.sb_rootino == ino)  {
2950                 /*
2951                  * mark root inode reached and bump up
2952                  * link count for root inode to account
2953                  * for '..' entry since the root inode is
2954                  * never reached by a parent.  we know
2955                  * that root's '..' is always good --
2956                  * guaranteed by phase 3 and/or below.
2957                  */
2958                 add_inode_reached(irec, ino_offset);
2959         }
2960
2961         add_inode_refchecked(irec, ino_offset);
2962
2963         hashtab = dir_hash_init(ip->i_d.di_size);
2964
2965         /*
2966          * look for bogus entries
2967          */
2968         switch (ip->i_d.di_format)  {
2969                 case XFS_DINODE_FMT_EXTENTS:
2970                 case XFS_DINODE_FMT_BTREE:
2971                         /*
2972                          * also check for missing '.' in longform dirs.
2973                          * missing .. entries are added if required when
2974                          * the directory is connected to lost+found. but
2975                          * we need to create '.' entries here.
2976                          */
2977                         longform_dir2_entry_check(mp, ino, ip,
2978                                                 &num_illegal, &need_dot,
2979                                                 irec, ino_offset,
2980                                                 hashtab);
2981                         break;
2982
2983                 case XFS_DINODE_FMT_LOCAL:
2984                         /*
2985                          * using the remove reservation is overkill
2986                          * since at most we'll only need to log the
2987                          * inode but it's easier than wedging a
2988                          * new define in ourselves.
2989                          */
2990                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2991                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2992                                                     nres, 0, 0, &tp);
2993                         if (error)
2994                                 res_failed(error);
2995
2996                         libxfs_trans_ijoin(tp, ip, 0);
2997
2998                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2999                                                 irec, ino_offset,
3000                                                 hashtab);
3001
3002                         ASSERT(dirty == 0 || (dirty && !no_modify));
3003                         if (dirty)  {
3004                                 libxfs_trans_log_inode(tp, ip,
3005                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
3006                                 libxfs_trans_commit(tp);
3007                         } else  {
3008                                 libxfs_trans_cancel(tp);
3009                         }
3010                         break;
3011
3012                 default:
3013                         break;
3014         }
3015         dir_hash_done(hashtab);
3016
3017         /*
3018          * if we have to create a .. for /, do it now *before*
3019          * we delete the bogus entries, otherwise the directory
3020          * could transform into a shortform dir which would
3021          * probably cause the simulation to choke.  Even
3022          * if the illegal entries get shifted around, it's ok
3023          * because the entries are structurally intact and in
3024          * in hash-value order so the simulation won't get confused
3025          * if it has to move them around.
3026          */
3027         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3028                 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
3029
3030                 do_warn(_("recreating root directory .. entry\n"));
3031
3032                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
3033                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3034                                             nres, 0, 0, &tp);
3035                 if (error)
3036                         res_failed(error);
3037
3038                 libxfs_trans_ijoin(tp, ip, 0);
3039
3040                 libxfs_defer_init(&dfops, &first);
3041
3042                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
3043                                         ip->i_ino, &first, &dfops, nres);
3044                 if (error)
3045                         do_error(
3046         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
3047
3048                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3049
3050                 libxfs_defer_ijoin(&dfops, ip);
3051                 error = -libxfs_defer_finish(&tp, &dfops);
3052                 ASSERT(error == 0);
3053                 libxfs_trans_commit(tp);
3054
3055                 need_root_dotdot = 0;
3056         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
3057                 do_warn(_("would recreate root directory .. entry\n"));
3058         }
3059
3060         /*
3061          * if we need to create the '.' entry, do so only if
3062          * the directory is a longform dir.  if it's been
3063          * turned into a shortform dir, then the inode is ok
3064          * since shortform dirs have no '.' entry and the inode
3065          * has already been committed by prune_lf_dir_entry().
3066          */
3067         if (need_dot)  {
3068                 /*
3069                  * bump up our link count but don't
3070                  * bump up the inode link count.  chances
3071                  * are good that even though we lost '.'
3072                  * the inode link counts reflect '.' so
3073                  * leave the inode link count alone and if
3074                  * it turns out to be wrong, we'll catch
3075                  * that in phase 7.
3076                  */
3077                 add_inode_ref(irec, ino_offset);
3078
3079                 if (no_modify)  {
3080                         do_warn(
3081         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
3082                                 ino);
3083                 } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
3084                         /*
3085                          * need to create . entry in longform dir.
3086                          */
3087                         do_warn(
3088         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
3089
3090                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
3091                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
3092                                                     nres, 0, 0, &tp);
3093                         if (error)
3094                                 res_failed(error);
3095
3096                         libxfs_trans_ijoin(tp, ip, 0);
3097
3098                         libxfs_defer_init(&dfops, &first);
3099
3100                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
3101                                         ip->i_ino, &first, &dfops, nres);
3102                         if (error)
3103                                 do_error(
3104         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
3105                                         ino, error);
3106
3107                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3108
3109                         libxfs_defer_ijoin(&dfops, ip);
3110                         error = -libxfs_defer_finish(&tp, &dfops);
3111                         ASSERT(error == 0);
3112                         libxfs_trans_commit(tp);
3113                 }
3114         }
3115         IRELE(ip);
3116 }
3117
3118 /*
3119  * mark realtime bitmap and summary inodes as reached.
3120  * quota inode will be marked here as well
3121  */
3122 static void
3123 mark_standalone_inodes(xfs_mount_t *mp)
3124 {
3125         ino_tree_node_t         *irec;
3126         int                     offset;
3127
3128         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3129                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3130
3131         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3132                         irec->ino_startnum;
3133
3134         add_inode_reached(irec, offset);
3135
3136         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3137                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3138
3139         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3140                         irec->ino_startnum;
3141
3142         add_inode_reached(irec, offset);
3143
3144         if (fs_quotas)  {
3145                 if (mp->m_sb.sb_uquotino
3146                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3147                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3148                                                 mp->m_sb.sb_uquotino),
3149                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3150                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3151                                         - irec->ino_startnum;
3152                         add_inode_reached(irec, offset);
3153                 }
3154                 if (mp->m_sb.sb_gquotino
3155                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3156                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3157                                                 mp->m_sb.sb_gquotino),
3158                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3159                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3160                                         - irec->ino_startnum;
3161                         add_inode_reached(irec, offset);
3162                 }
3163                 if (mp->m_sb.sb_pquotino
3164                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3165                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3166                                                 mp->m_sb.sb_pquotino),
3167                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3168                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3169                                         - irec->ino_startnum;
3170                         add_inode_reached(irec, offset);
3171                 }
3172         }
3173 }
3174
3175 static void
3176 check_for_orphaned_inodes(
3177         xfs_mount_t             *mp,
3178         xfs_agnumber_t          agno,
3179         ino_tree_node_t         *irec)
3180 {
3181         int                     i;
3182         xfs_ino_t               ino;
3183
3184         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3185                 ASSERT(is_inode_confirmed(irec, i));
3186                 if (is_inode_free(irec, i))
3187                         continue;
3188
3189                 if (is_inode_reached(irec, i))
3190                         continue;
3191
3192                 ASSERT(inode_isadir(irec, i) ||
3193                         num_inode_references(irec, i) == 0);
3194
3195                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3196                 if (inode_isadir(irec, i))
3197                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3198                 else
3199                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3200                 if (!no_modify)  {
3201                         if (!orphanage_ino)
3202                                 orphanage_ino = mk_orphanage(mp);
3203                         do_warn(_("moving to %s\n"), ORPHANAGE);
3204                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3205                 } else  {
3206                         do_warn(_("would move to %s\n"), ORPHANAGE);
3207                 }
3208                 /*
3209                  * for read-only case, even though the inode isn't
3210                  * really reachable, set the flag (and bump our link
3211                  * count) anyway to fool phase 7
3212                  */
3213                 add_inode_reached(irec, i);
3214         }
3215 }
3216
3217 static void
3218 traverse_function(
3219         struct workqueue        *wq,
3220         xfs_agnumber_t          agno,
3221         void                    *arg)
3222 {
3223         ino_tree_node_t         *irec;
3224         int                     i;
3225         prefetch_args_t         *pf_args = arg;
3226
3227         wait_for_inode_prefetch(pf_args);
3228
3229         if (verbose)
3230                 do_log(_("        - agno = %d\n"), agno);
3231
3232         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3233                 if (irec->ino_isa_dir == 0)
3234                         continue;
3235
3236                 if (pf_args) {
3237                         sem_post(&pf_args->ra_count);
3238 #ifdef XR_PF_TRACE
3239                         sem_getvalue(&pf_args->ra_count, &i);
3240                         pftrace(
3241                 "processing inode chunk %p in AG %d (sem count = %d)",
3242                                 irec, agno, i);
3243 #endif
3244                 }
3245
3246                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3247                         if (inode_isadir(irec, i))
3248                                 process_dir_inode(wq->wq_ctx, agno, irec, i);
3249                 }
3250         }
3251         cleanup_inode_prefetch(pf_args);
3252 }
3253
3254 static void
3255 update_missing_dotdot_entries(
3256         xfs_mount_t             *mp)
3257 {
3258         dotdot_update_t         *dir;
3259
3260         /*
3261          * these entries parents were updated, rebuild them again
3262          * set dotdot_update flag so processing routines do not count links
3263          */
3264         dotdot_update = 1;
3265         while (!list_empty(&dotdot_update_list)) {
3266                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3267                                  list);
3268                 list_del(&dir->list);
3269                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3270                 free(dir);
3271         }
3272 }
3273
3274 static void
3275 traverse_ags(
3276         struct xfs_mount        *mp)
3277 {
3278         do_inode_prefetch(mp, 0, traverse_function, false, true);
3279 }
3280
3281 void
3282 phase6(xfs_mount_t *mp)
3283 {
3284         ino_tree_node_t         *irec;
3285         int                     i;
3286
3287         memset(&zerocr, 0, sizeof(struct cred));
3288         memset(&zerofsx, 0, sizeof(struct fsxattr));
3289         orphanage_ino = 0;
3290
3291         do_log(_("Phase 6 - check inode connectivity...\n"));
3292
3293         incore_ext_teardown(mp);
3294
3295         add_ino_ex_data(mp);
3296
3297         /*
3298          * verify existence of root directory - if we have to
3299          * make one, it's ok for the incore data structs not to
3300          * know about it since everything about it (and the other
3301          * inodes in its chunk if a new chunk was created) are ok
3302          */
3303         if (need_root_inode)  {
3304                 if (!no_modify)  {
3305                         do_warn(_("reinitializing root directory\n"));
3306                         mk_root_dir(mp);
3307                         need_root_inode = 0;
3308                         need_root_dotdot = 0;
3309                 } else  {
3310                         do_warn(_("would reinitialize root directory\n"));
3311                 }
3312         }
3313
3314         if (need_rbmino)  {
3315                 if (!no_modify)  {
3316                         do_warn(_("reinitializing realtime bitmap inode\n"));
3317                         mk_rbmino(mp);
3318                         need_rbmino = 0;
3319                 } else  {
3320                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3321                 }
3322         }
3323
3324         if (need_rsumino)  {
3325                 if (!no_modify)  {
3326                         do_warn(_("reinitializing realtime summary inode\n"));
3327                         mk_rsumino(mp);
3328                         need_rsumino = 0;
3329                 } else  {
3330                         do_warn(_("would reinitialize realtime summary inode\n"));
3331                 }
3332         }
3333
3334         if (!no_modify)  {
3335                 do_log(
3336 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3337                 if (fill_rbmino(mp))  {
3338                         do_warn(
3339                         _("Warning:  realtime bitmap may be inconsistent\n"));
3340                 }
3341
3342                 if (fill_rsumino(mp))  {
3343                         do_warn(
3344                         _("Warning:  realtime bitmap may be inconsistent\n"));
3345                 }
3346         }
3347
3348         mark_standalone_inodes(mp);
3349
3350         do_log(_("        - traversing filesystem ...\n"));
3351
3352         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3353                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3354
3355         /*
3356          * we always have a root inode, even if it's free...
3357          * if the root is free, forget it, lost+found is already gone
3358          */
3359         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3360                 need_root_inode = 1;
3361         }
3362
3363         /*
3364          * then process all inodes by walking incore inode tree
3365          */
3366         traverse_ags(mp);
3367
3368         /*
3369          * any directories that had updated ".." entries, rebuild them now
3370          */
3371         update_missing_dotdot_entries(mp);
3372
3373         do_log(_("        - traversal finished ...\n"));
3374         do_log(_("        - moving disconnected inodes to %s ...\n"),
3375                 ORPHANAGE);
3376
3377         /*
3378          * move all disconnected inodes to the orphanage
3379          */
3380         for (i = 0; i < glob_agcount; i++)  {
3381                 irec = findfirst_inode_rec(i);
3382                 while (irec != NULL)  {
3383                         check_for_orphaned_inodes(mp, i, irec);
3384                         irec = next_ino_rec(irec);
3385                 }
3386         }
3387 }