repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "threads.h"
  10 #include "prefetch.h"
  11 #include "avl.h"
  12 #include "globals.h"
  13 #include "agheader.h"
  14 #include "incore.h"
  15 #include "dir2.h"
  16 #include "protos.h"
  17 #include "err_protos.h"
  18 #include "dinode.h"
  19 #include "progress.h"
  20 #include "versions.h"
  21
  22 static struct cred              zerocr;
  23 static struct fsxattr           zerofsx;
  24 static xfs_ino_t                orphanage_ino;
  25
  26 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  27                                                 1,
  28                                                 XFS_DIR3_FT_DIR};
  29
  30 /*
  31  * Data structures used to keep track of directories where the ".."
  32  * entries are updated. These must be rebuilt after the initial pass
  33  */
  34 typedef struct dotdot_update {
  35         struct list_head        list;
  36         ino_tree_node_t         *irec;
  37         xfs_agnumber_t          agno;
  38         int                     ino_offset;
  39 } dotdot_update_t;
  40
  41 static LIST_HEAD(dotdot_update_list);
  42 static int                      dotdot_update;
  43
  44 static void
  45 add_dotdot_update(
  46         xfs_agnumber_t          agno,
  47         ino_tree_node_t         *irec,
  48         int                     ino_offset)
  49 {
  50         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
  51
  52         if (!dir)
  53                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
  54                         sizeof(dotdot_update_t));
  55
  56         INIT_LIST_HEAD(&dir->list);
  57         dir->irec = irec;
  58         dir->agno = agno;
  59         dir->ino_offset = ino_offset;
  60
  61         list_add(&dir->list, &dotdot_update_list);
  62 }
  63
  64 /*
  65  * Data structures and routines to keep track of directory entries
  66  * and whether their leaf entry has been seen. Also used for name
  67  * duplicate checking and rebuilding step if required.
  68  */
  69 struct dir_hash_ent {
  70         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
  71         struct dir_hash_ent     *nextbyorder;   /* next in order added */
  72         xfs_dahash_t            hashval;        /* hash value of name */
  73         uint32_t                address;        /* offset of data entry */
  74         xfs_ino_t               inum;           /* inode num of entry */
  75         short                   junkit;         /* name starts with / */
  76         short                   seen;           /* have seen leaf entry */
  77         struct xfs_name         name;
  78         unsigned char           namebuf[];
  79 };
  80
  81 struct dir_hash_tab {
  82         int                     size;           /* size of hash tables */
  83         struct dir_hash_ent     *first;         /* ptr to first added entry */
  84         struct dir_hash_ent     *last;          /* ptr to last added entry */
  85         struct dir_hash_ent     **byhash;       /* ptr to name hash buckets */
  86 #define HT_UNSEEN               1
  87         struct radix_tree_root  byaddr;
  88 };
  89
  90 #define DIR_HASH_TAB_SIZE(n)    \
  91         (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
  92 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
  93
  94 /*
  95  * Track the contents of the freespace table in a directory.
  96  */
  97 typedef struct freetab {
  98         int                     naents; /* expected number of data blocks */
  99         int                     nents;  /* number of data blocks processed */
 100         struct freetab_ent {
 101                 xfs_dir2_data_off_t     v;
 102                 short                   s;
 103         } ents[1];
 104 } freetab_t;
 105 #define FREETAB_SIZE(n) \
 106         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 107
 108 #define DIR_HASH_CK_OK          0
 109 #define DIR_HASH_CK_DUPLEAF     1
 110 #define DIR_HASH_CK_BADHASH     2
 111 #define DIR_HASH_CK_NODATA      3
 112 #define DIR_HASH_CK_NOLEAF      4
 113 #define DIR_HASH_CK_BADSTALE    5
 114 #define DIR_HASH_CK_TOTAL       6
 115
 116 /*
 117  * Need to handle CRC and validation errors specially here. If there is a
 118  * validator error, re-read without the verifier so that we get a buffer we can
 119  * check and repair. Re-attach the ops to the buffer after the read so that when
 120  * it is rewritten the CRC is recalculated.
 121  *
 122  * If the buffer was not read, we return an error. If the buffer was read but
 123  * had a CRC or corruption error, we reread it without the verifier and if it is
 124  * read successfully we increment *crc_error and return 0. Otherwise we
 125  * return the read error.
 126  */
 127 static int
 128 dir_read_buf(
 129         struct xfs_inode        *ip,
 130         xfs_dablk_t             bno,
 131         struct xfs_buf          **bpp,
 132         const struct xfs_buf_ops *ops,
 133         int                     *crc_error)
 134 {
 135         int error;
 136         int error2;
 137
 138         error = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK, ops);
 139
 140         if (error != EFSBADCRC && error != EFSCORRUPTED)
 141                 return error;
 142
 143         error2 = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK,
 144                         NULL);
 145         if (error2)
 146                 return error2;
 147
 148         (*crc_error)++;
 149         (*bpp)->b_ops = ops;
 150         return 0;
 151 }
 152
 153 /*
 154  * Returns 0 if the name already exists (ie. a duplicate)
 155  */
 156 static int
 157 dir_hash_add(
 158         struct xfs_mount        *mp,
 159         struct dir_hash_tab     *hashtab,
 160         uint32_t                addr,
 161         xfs_ino_t               inum,
 162         int                     namelen,
 163         unsigned char           *name,
 164         uint8_t                 ftype)
 165 {
 166         xfs_dahash_t            hash = 0;
 167         int                     byhash = 0;
 168         struct dir_hash_ent     *p;
 169         int                     dup;
 170         short                   junk;
 171         struct xfs_name         xname;
 172         int                     error;
 173
 174         xname.name = name;
 175         xname.len = namelen;
 176         xname.type = ftype;
 177
 178         junk = name[0] == '/';
 179         dup = 0;
 180
 181         if (!junk) {
 182                 hash = libxfs_dir2_hashname(mp, &xname);
 183                 byhash = DIR_HASH_FUNC(hashtab, hash);
 184
 185                 /*
 186                  * search hash bucket for existing name.
 187                  */
 188                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 189                         if (p->hashval == hash && p->name.len == namelen) {
 190                                 if (memcmp(p->name.name, name, namelen) == 0) {
 191                                         dup = 1;
 192                                         junk = 1;
 193                                         break;
 194                                 }
 195                         }
 196                 }
 197         }
 198
 199         /*
 200          * Allocate enough space for the hash entry and the name in a single
 201          * allocation so we can store our own copy of the name for later use.
 202          */
 203         p = calloc(1, sizeof(*p) + namelen + 1);
 204         if (!p)
 205                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 206                         sizeof(*p));
 207
 208         error = radix_tree_insert(&hashtab->byaddr, addr, p);
 209         if (error == EEXIST) {
 210                 do_warn(_("duplicate addrs %u in directory!\n"), addr);
 211                 free(p);
 212                 return 0;
 213         }
 214         radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
 215
 216         if (hashtab->last)
 217                 hashtab->last->nextbyorder = p;
 218         else
 219                 hashtab->first = p;
 220         p->nextbyorder = NULL;
 221         hashtab->last = p;
 222
 223         if (!(p->junkit = junk)) {
 224                 p->hashval = hash;
 225                 p->nextbyhash = hashtab->byhash[byhash];
 226                 hashtab->byhash[byhash] = p;
 227         }
 228         p->address = addr;
 229         p->inum = inum;
 230         p->seen = 0;
 231
 232         /* Set up the name in the region trailing the hash entry. */
 233         memcpy(p->namebuf, name, namelen);
 234         p->name.name = p->namebuf;
 235         p->name.len = namelen;
 236         p->name.type = ftype;
 237         return !dup;
 238 }
 239
 240 static int
 241 dir_hash_check(
 242         struct dir_hash_tab     *hashtab,
 243         struct xfs_inode        *ip,
 244         int                     seeval)
 245 {
 246         static char             *seevalstr[DIR_HASH_CK_TOTAL];
 247         static int              done;
 248
 249         if (!done) {
 250                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 251                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 252                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 253                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 254                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 255                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 256                 done = 1;
 257         }
 258
 259         if (seeval == DIR_HASH_CK_OK &&
 260             radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
 261                 seeval = DIR_HASH_CK_NOLEAF;
 262         if (seeval == DIR_HASH_CK_OK)
 263                 return 0;
 264         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 265                 ip->i_ino, seevalstr[seeval]);
 266         if (!no_modify)
 267                 do_warn(_("rebuilding\n"));
 268         else
 269                 do_warn(_("would rebuild\n"));
 270         return 1;
 271 }
 272
 273 static void
 274 dir_hash_done(
 275         struct dir_hash_tab     *hashtab)
 276 {
 277         int                     i;
 278         struct dir_hash_ent     *n;
 279         struct dir_hash_ent     *p;
 280
 281         for (i = 0; i < hashtab->size; i++) {
 282                 for (p = hashtab->byhash[i]; p; p = n) {
 283                         n = p->nextbyhash;
 284                         radix_tree_delete(&hashtab->byaddr, p->address);
 285                         free(p);
 286                 }
 287         }
 288         free(hashtab);
 289 }
 290
 291 static struct dir_hash_tab *
 292 dir_hash_init(
 293         xfs_fsize_t             size)
 294 {
 295         struct dir_hash_tab     *hashtab;
 296         int                     hsize;
 297
 298         hsize = size / (16 * 4);
 299         if (hsize > 65536)
 300                 hsize = 63336;
 301         else if (hsize < 16)
 302                 hsize = 16;
 303         if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
 304                 do_error(_("calloc failed in dir_hash_init\n"));
 305         hashtab->size = hsize;
 306         hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
 307                 sizeof(struct dir_hash_tab));
 308         INIT_RADIX_TREE(&hashtab->byaddr, 0);
 309         return hashtab;
 310 }
 311
 312 static int
 313 dir_hash_see(
 314         struct dir_hash_tab     *hashtab,
 315         xfs_dahash_t            hash,
 316         xfs_dir2_dataptr_t      addr)
 317 {
 318         struct dir_hash_ent     *p;
 319
 320         p = radix_tree_lookup(&hashtab->byaddr, addr);
 321         if (!p)
 322                 return DIR_HASH_CK_NODATA;
 323         if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
 324                 return DIR_HASH_CK_DUPLEAF;
 325         if (p->junkit == 0 && p->hashval != hash)
 326                 return DIR_HASH_CK_BADHASH;
 327         radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
 328         return DIR_HASH_CK_OK;
 329 }
 330
 331 static void
 332 dir_hash_update_ftype(
 333         struct dir_hash_tab     *hashtab,
 334         xfs_dir2_dataptr_t      addr,
 335         uint8_t                 ftype)
 336 {
 337         struct dir_hash_ent     *p;
 338
 339         p = radix_tree_lookup(&hashtab->byaddr, addr);
 340         if (!p)
 341                 return;
 342         p->name.type = ftype;
 343 }
 344
 345 /*
 346  * checks to make sure leafs match a data entry, and that the stale
 347  * count is valid.
 348  */
 349 static int
 350 dir_hash_see_all(
 351         struct dir_hash_tab     *hashtab,
 352         xfs_dir2_leaf_entry_t   *ents,
 353         int                     count,
 354         int                     stale)
 355 {
 356         int                     i;
 357         int                     j;
 358         int                     rval;
 359
 360         for (i = j = 0; i < count; i++) {
 361                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 362                         j++;
 363                         continue;
 364                 }
 365                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 366                                         be32_to_cpu(ents[i].address));
 367                 if (rval != DIR_HASH_CK_OK)
 368                         return rval;
 369         }
 370         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 371 }
 372
 373 /*
 374  * Given a block number in a fork, return the next valid block number (not a
 375  * hole).  If this is the last block number then NULLFILEOFF is returned.
 376  */
 377 static int
 378 bmap_next_offset(
 379         struct xfs_inode        *ip,
 380         xfs_fileoff_t           *bnop)
 381 {
 382         xfs_fileoff_t           bno;
 383         int                     error;
 384         struct xfs_bmbt_irec    got;
 385         struct xfs_iext_cursor  icur;
 386
 387         switch (ip->i_df.if_format) {
 388         case XFS_DINODE_FMT_LOCAL:
 389                 *bnop = NULLFILEOFF;
 390                 return 0;
 391         case XFS_DINODE_FMT_BTREE:
 392         case XFS_DINODE_FMT_EXTENTS:
 393                 break;
 394         default:
 395                 return EIO;
 396         }
 397
 398         if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
 399                 error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
 400                 if (error)
 401                         return error;
 402         }
 403
 404         bno = *bnop + 1;
 405         if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
 406                 *bnop = NULLFILEOFF;
 407         else
 408                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 409         return 0;
 410 }
 411
 412 static void
 413 res_failed(
 414         int     err)
 415 {
 416         if (err == ENOSPC) {
 417                 do_error(_("ran out of disk space!\n"));
 418         } else
 419                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 420 }
 421
 422 static void
 423 mk_rbmino(xfs_mount_t *mp)
 424 {
 425         xfs_trans_t     *tp;
 426         xfs_inode_t     *ip;
 427         xfs_bmbt_irec_t *ep;
 428         int             i;
 429         int             nmap;
 430         int             error;
 431         xfs_fileoff_t   bno;
 432         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 433         int             times;
 434         uint            blocks;
 435
 436         /*
 437          * first set up inode
 438          */
 439         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 440         if (i)
 441                 res_failed(i);
 442
 443         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 444         if (error) {
 445                 do_error(
 446                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 447                         error);
 448         }
 449
 450         memset(&ip->i_d, 0, sizeof(ip->i_d));
 451
 452         VFS_I(ip)->i_mode = S_IFREG;
 453         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 454         if (ip->i_afp)
 455                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 456
 457         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 458
 459         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 460         if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
 461                 VFS_I(ip)->i_version = 1;
 462                 ip->i_d.di_flags2 = 0;
 463                 times |= XFS_ICHGTIME_CREATE;
 464         }
 465         libxfs_trans_ichgtime(tp, ip, times);
 466
 467         /*
 468          * now the ifork
 469          */
 470         ip->i_df.if_flags = XFS_IFEXTENTS;
 471         ip->i_df.if_bytes = 0;
 472         ip->i_df.if_u1.if_root = NULL;
 473
 474         ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 475
 476         /*
 477          * commit changes
 478          */
 479         libxfs_trans_ijoin(tp, ip, 0);
 480         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 481         error = -libxfs_trans_commit(tp);
 482         if (error)
 483                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 484
 485         /*
 486          * then allocate blocks for file and fill with zeroes (stolen
 487          * from mkfs)
 488          */
 489         blocks = mp->m_sb.sb_rbmblocks +
 490                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 491         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 492         if (error)
 493                 res_failed(error);
 494
 495         libxfs_trans_ijoin(tp, ip, 0);
 496         bno = 0;
 497         while (bno < mp->m_sb.sb_rbmblocks) {
 498                 nmap = XFS_BMAP_MAX_NMAP;
 499                 error = -libxfs_bmapi_write(tp, ip, bno,
 500                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 501                           0, mp->m_sb.sb_rbmblocks, map, &nmap);
 502                 if (error) {
 503                         do_error(
 504                         _("couldn't allocate realtime bitmap, error = %d\n"),
 505                                 error);
 506                 }
 507                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 508                         libxfs_device_zero(mp->m_ddev_targp,
 509                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 510                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 511                         bno += ep->br_blockcount;
 512                 }
 513         }
 514         error = -libxfs_trans_commit(tp);
 515         if (error) {
 516                 do_error(
 517                 _("allocation of the realtime bitmap failed, error = %d\n"),
 518                         error);
 519         }
 520         libxfs_irele(ip);
 521 }
 522
 523 static int
 524 fill_rbmino(xfs_mount_t *mp)
 525 {
 526         struct xfs_buf  *bp;
 527         xfs_trans_t     *tp;
 528         xfs_inode_t     *ip;
 529         xfs_rtword_t    *bmp;
 530         int             nmap;
 531         int             error;
 532         xfs_fileoff_t   bno;
 533         xfs_bmbt_irec_t map;
 534
 535         bmp = btmcompute;
 536         bno = 0;
 537
 538         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 539         if (error)
 540                 res_failed(error);
 541
 542         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 543         if (error) {
 544                 do_error(
 545                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 546                         error);
 547         }
 548
 549         while (bno < mp->m_sb.sb_rbmblocks)  {
 550                 /*
 551                  * fill the file one block at a time
 552                  */
 553                 nmap = 1;
 554                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 555                 if (error || nmap != 1) {
 556                         do_error(
 557         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 558                                 bno, error);
 559                 }
 560
 561                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 562
 563                 error = -libxfs_trans_read_buf(
 564                                 mp, tp, mp->m_dev,
 565                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 566                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 567
 568                 if (error) {
 569                         do_warn(
 570 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 571                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 572                         return(1);
 573                 }
 574
 575                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 576
 577                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 578
 579                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 580                 bno++;
 581         }
 582
 583         libxfs_trans_ijoin(tp, ip, 0);
 584         error = -libxfs_trans_commit(tp);
 585         if (error)
 586                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 587         libxfs_irele(ip);
 588         return(0);
 589 }
 590
 591 static int
 592 fill_rsumino(xfs_mount_t *mp)
 593 {
 594         struct xfs_buf  *bp;
 595         xfs_trans_t     *tp;
 596         xfs_inode_t     *ip;
 597         xfs_suminfo_t   *smp;
 598         int             nmap;
 599         int             error;
 600         xfs_fileoff_t   bno;
 601         xfs_fileoff_t   end_bno;
 602         xfs_bmbt_irec_t map;
 603
 604         smp = sumcompute;
 605         bno = 0;
 606         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 607
 608         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 609         if (error)
 610                 res_failed(error);
 611
 612         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 613         if (error) {
 614                 do_error(
 615                 _("couldn't iget realtime summary inode -- error - %d\n"),
 616                         error);
 617         }
 618
 619         while (bno < end_bno)  {
 620                 /*
 621                  * fill the file one block at a time
 622                  */
 623                 nmap = 1;
 624                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 625                 if (error || nmap != 1) {
 626                         do_error(
 627         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 628                                 bno, error);
 629                 }
 630
 631                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 632
 633                 error = -libxfs_trans_read_buf(
 634                                 mp, tp, mp->m_dev,
 635                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 636                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 637
 638                 if (error) {
 639                         do_warn(
 640 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 641                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 642                         libxfs_irele(ip);
 643                         return(1);
 644                 }
 645
 646                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 647
 648                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 649
 650                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 651                 bno++;
 652         }
 653
 654         libxfs_trans_ijoin(tp, ip, 0);
 655         error = -libxfs_trans_commit(tp);
 656         if (error)
 657                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 658         libxfs_irele(ip);
 659         return(0);
 660 }
 661
 662 static void
 663 mk_rsumino(xfs_mount_t *mp)
 664 {
 665         xfs_trans_t     *tp;
 666         xfs_inode_t     *ip;
 667         xfs_bmbt_irec_t *ep;
 668         int             i;
 669         int             nmap;
 670         int             error;
 671         int             nsumblocks;
 672         xfs_fileoff_t   bno;
 673         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 674         int             times;
 675         uint            blocks;
 676
 677         /*
 678          * first set up inode
 679          */
 680         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 681         if (i)
 682                 res_failed(i);
 683
 684         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 685         if (error) {
 686                 do_error(
 687                 _("couldn't iget realtime summary inode -- error - %d\n"),
 688                         error);
 689         }
 690
 691         memset(&ip->i_d, 0, sizeof(ip->i_d));
 692
 693         VFS_I(ip)->i_mode = S_IFREG;
 694         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 695         if (ip->i_afp)
 696                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 697
 698         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 699
 700         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 701         if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
 702                 VFS_I(ip)->i_version = 1;
 703                 ip->i_d.di_flags2 = 0;
 704                 times |= XFS_ICHGTIME_CREATE;
 705         }
 706         libxfs_trans_ichgtime(tp, ip, times);
 707
 708         /*
 709          * now the ifork
 710          */
 711         ip->i_df.if_flags = XFS_IFEXTENTS;
 712         ip->i_df.if_bytes = 0;
 713         ip->i_df.if_u1.if_root = NULL;
 714
 715         ip->i_d.di_size = mp->m_rsumsize;
 716
 717         /*
 718          * commit changes
 719          */
 720         libxfs_trans_ijoin(tp, ip, 0);
 721         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 722         error = -libxfs_trans_commit(tp);
 723         if (error)
 724                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 725
 726         /*
 727          * then allocate blocks for file and fill with zeroes (stolen
 728          * from mkfs)
 729          */
 730         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 731         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 732         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 733         if (error)
 734                 res_failed(error);
 735
 736         libxfs_trans_ijoin(tp, ip, 0);
 737         bno = 0;
 738         while (bno < nsumblocks) {
 739                 nmap = XFS_BMAP_MAX_NMAP;
 740                 error = -libxfs_bmapi_write(tp, ip, bno,
 741                           (xfs_extlen_t)(nsumblocks - bno),
 742                           0, nsumblocks, map, &nmap);
 743                 if (error) {
 744                         do_error(
 745                 _("couldn't allocate realtime summary inode, error = %d\n"),
 746                                 error);
 747                 }
 748                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 749                         libxfs_device_zero(mp->m_ddev_targp,
 750                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 751                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 752                         bno += ep->br_blockcount;
 753                 }
 754         }
 755         error = -libxfs_trans_commit(tp);
 756         if (error) {
 757                 do_error(
 758         _("allocation of the realtime summary ino failed, error = %d\n"),
 759                         error);
 760         }
 761         libxfs_irele(ip);
 762 }
 763
 764 /*
 765  * makes a new root directory.
 766  */
 767 static void
 768 mk_root_dir(xfs_mount_t *mp)
 769 {
 770         xfs_trans_t     *tp;
 771         xfs_inode_t     *ip;
 772         int             i;
 773         int             error;
 774         const mode_t    mode = 0755;
 775         ino_tree_node_t *irec;
 776         int             times;
 777
 778         ip = NULL;
 779         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 780         if (i)
 781                 res_failed(i);
 782
 783         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
 784         if (error) {
 785                 do_error(_("could not iget root inode -- error - %d\n"), error);
 786         }
 787
 788         /*
 789          * take care of the core -- initialization from xfs_ialloc()
 790          */
 791         memset(&ip->i_d, 0, sizeof(ip->i_d));
 792
 793         VFS_I(ip)->i_mode = mode|S_IFDIR;
 794         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 795         if (ip->i_afp)
 796                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 797
 798         set_nlink(VFS_I(ip), 2);        /* account for . and .. */
 799
 800         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 801         if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
 802                 VFS_I(ip)->i_version = 1;
 803                 ip->i_d.di_flags2 = 0;
 804                 times |= XFS_ICHGTIME_CREATE;
 805         }
 806         libxfs_trans_ichgtime(tp, ip, times);
 807         libxfs_trans_ijoin(tp, ip, 0);
 808         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 809
 810         /*
 811          * now the ifork
 812          */
 813         ip->i_df.if_flags = XFS_IFEXTENTS;
 814         ip->i_df.if_bytes = 0;
 815         ip->i_df.if_u1.if_root = NULL;
 816
 817         /*
 818          * initialize the directory
 819          */
 820         libxfs_dir_init(tp, ip, ip);
 821
 822         error = -libxfs_trans_commit(tp);
 823         if (error)
 824                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 825
 826         libxfs_irele(ip);
 827
 828         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 829                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 830         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 831                                 irec->ino_startnum);
 832 }
 833
 834 /*
 835  * orphanage name == lost+found
 836  */
 837 static xfs_ino_t
 838 mk_orphanage(xfs_mount_t *mp)
 839 {
 840         xfs_ino_t       ino;
 841         xfs_trans_t     *tp;
 842         xfs_inode_t     *ip;
 843         xfs_inode_t     *pip;
 844         ino_tree_node_t *irec;
 845         int             ino_offset = 0;
 846         int             i;
 847         int             error;
 848         const int       mode = 0755;
 849         int             nres;
 850         struct xfs_name xname;
 851
 852         /*
 853          * check for an existing lost+found first, if it exists, return
 854          * its inode. Otherwise, we can create it. Bad lost+found inodes
 855          * would have been cleared in phase3 and phase4.
 856          */
 857
 858         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 859         if (i)
 860                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 861                         i, ORPHANAGE);
 862
 863         xname.name = (unsigned char *)ORPHANAGE;
 864         xname.len = strlen(ORPHANAGE);
 865         xname.type = XFS_DIR3_FT_DIR;
 866
 867         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 868                 return ino;
 869
 870         /*
 871          * could not be found, create it
 872          */
 873         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 874         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 875         if (i)
 876                 res_failed(i);
 877
 878         /*
 879          * use iget/ijoin instead of trans_iget because the ialloc
 880          * wrapper can commit the transaction and start a new one
 881          */
 882 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 883         if (i)
 884                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 885                         i, ORPHANAGE);*/
 886
 887         error = -libxfs_dir_ialloc(&tp, pip, mode|S_IFDIR,
 888                                         1, 0, &zerocr, &zerofsx, &ip);
 889         if (error) {
 890                 do_error(_("%s inode allocation failed %d\n"),
 891                         ORPHANAGE, error);
 892         }
 893         inc_nlink(VFS_I(ip));           /* account for . */
 894         ino = ip->i_ino;
 895
 896         irec = find_inode_rec(mp,
 897                         XFS_INO_TO_AGNO(mp, ino),
 898                         XFS_INO_TO_AGINO(mp, ino));
 899
 900         if (irec == NULL) {
 901                 /*
 902                  * This inode is allocated from a newly created inode
 903                  * chunk and therefore did not exist when inode chunks
 904                  * were processed in phase3. Add this group of inodes to
 905                  * the entry avl tree as if they were discovered in phase3.
 906                  */
 907                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
 908                                             XFS_INO_TO_AGINO(mp, ino));
 909                 alloc_ex_data(irec);
 910
 911                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
 912                         set_inode_free(irec, i);
 913         }
 914
 915         ino_offset = get_inode_offset(mp, ino, irec);
 916
 917         /*
 918          * Mark the inode allocated to lost+found as used in the AVL tree
 919          * so it is not skipped in phase 7
 920          */
 921         set_inode_used(irec, ino_offset);
 922         add_inode_ref(irec, ino_offset);
 923         add_inode_reached(irec, ino_offset);
 924
 925         /*
 926          * now that we know the transaction will stay around,
 927          * add the root inode to it
 928          */
 929         libxfs_trans_ijoin(tp, pip, 0);
 930
 931         /*
 932          * create the actual entry
 933          */
 934         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
 935         if (error)
 936                 do_error(
 937                 _("can't make %s, createname error %d\n"),
 938                         ORPHANAGE, error);
 939
 940         /*
 941          * bump up the link count in the root directory to account
 942          * for .. in the new directory, and update the irec copy of the
 943          * on-disk nlink so we don't fail the link count check later.
 944          */
 945         inc_nlink(VFS_I(pip));
 946         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 947                                   XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 948         add_inode_ref(irec, 0);
 949         set_inode_disk_nlinks(irec, 0, get_inode_disk_nlinks(irec, 0) + 1);
 950
 951         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
 952         libxfs_dir_init(tp, ip, pip);
 953         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 954         error = -libxfs_trans_commit(tp);
 955         if (error) {
 956                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
 957                         ORPHANAGE, error);
 958         }
 959         libxfs_irele(ip);
 960         libxfs_irele(pip);
 961
 962         return(ino);
 963 }
 964
 965 /*
 966  * move a file to the orphange.
 967  */
 968 static void
 969 mv_orphanage(
 970         xfs_mount_t             *mp,
 971         xfs_ino_t               ino,            /* inode # to be moved */
 972         int                     isa_dir)        /* 1 if inode is a directory */
 973 {
 974         xfs_inode_t             *orphanage_ip;
 975         xfs_ino_t               entry_ino_num;
 976         xfs_inode_t             *ino_p;
 977         xfs_trans_t             *tp;
 978         int                     err;
 979         unsigned char           fname[MAXPATHLEN + 1];
 980         int                     nres;
 981         int                     incr;
 982         ino_tree_node_t         *irec;
 983         int                     ino_offset = 0;
 984         struct xfs_name         xname;
 985
 986         xname.name = fname;
 987         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
 988                                 (unsigned long long)ino);
 989
 990         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip);
 991         if (err)
 992                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
 993         /*
 994          * Make sure the filename is unique in the lost+found
 995          */
 996         incr = 0;
 997         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
 998                                                                 NULL) == 0)
 999                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1000                                         (unsigned long long)ino, ++incr);
1001
1002         /* Orphans may not have a proper parent, so use custom ops here */
1003         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p);
1004         if (err)
1005                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1006
1007         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1008
1009         if (isa_dir)  {
1010                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1011                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1012                 if (irec)
1013                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1014                                         irec->ino_startnum;
1015                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1016                        XFS_DIRENTER_SPACE_RES(mp, 2);
1017                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1018                                         &entry_ino_num, NULL);
1019                 if (err) {
1020                         ASSERT(err == ENOENT);
1021
1022                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1023                                                   nres, 0, 0, &tp);
1024                         if (err)
1025                                 do_error(
1026         _("space reservation failed (%d), filesystem may be out of space\n"),
1027                                         err);
1028
1029                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1030                         libxfs_trans_ijoin(tp, ino_p, 0);
1031
1032                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1033                                                 ino, nres);
1034                         if (err)
1035                                 do_error(
1036         _("name create failed in %s (%d), filesystem may be out of space\n"),
1037                                         ORPHANAGE, err);
1038
1039                         if (irec)
1040                                 add_inode_ref(irec, ino_offset);
1041                         else
1042                                 inc_nlink(VFS_I(orphanage_ip));
1043                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1044
1045                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1046                                         orphanage_ino, nres);
1047                         if (err)
1048                                 do_error(
1049         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1050                                         err);
1051
1052                         inc_nlink(VFS_I(ino_p));
1053                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1054                         err = -libxfs_trans_commit(tp);
1055                         if (err)
1056                                 do_error(
1057         _("creation of .. entry failed (%d)\n"), err);
1058                 } else  {
1059                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1060                                                   nres, 0, 0, &tp);
1061                         if (err)
1062                                 do_error(
1063         _("space reservation failed (%d), filesystem may be out of space\n"),
1064                                         err);
1065
1066                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1067                         libxfs_trans_ijoin(tp, ino_p, 0);
1068
1069
1070                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1071                                                 ino, nres);
1072                         if (err)
1073                                 do_error(
1074         _("name create failed in %s (%d), filesystem may be out of space\n"),
1075                                         ORPHANAGE, err);
1076
1077                         if (irec)
1078                                 add_inode_ref(irec, ino_offset);
1079                         else
1080                                 inc_nlink(VFS_I(orphanage_ip));
1081                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1082
1083                         /*
1084                          * don't replace .. value if it already points
1085                          * to us.  that'll pop a libxfs/kernel ASSERT.
1086                          */
1087                         if (entry_ino_num != orphanage_ino)  {
1088                                 err = -libxfs_dir_replace(tp, ino_p,
1089                                                 &xfs_name_dotdot, orphanage_ino,
1090                                                 nres);
1091                                 if (err)
1092                                         do_error(
1093         _("name replace op failed (%d), filesystem may be out of space\n"),
1094                                                 err);
1095                         }
1096
1097                         err = -libxfs_trans_commit(tp);
1098                         if (err)
1099                                 do_error(
1100         _("orphanage name replace op failed (%d)\n"), err);
1101                 }
1102
1103         } else  {
1104                 /*
1105                  * use the remove log reservation as that's
1106                  * more accurate.  we're only creating the
1107                  * links, we're not doing the inode allocation
1108                  * also accounted for in the create
1109                  */
1110                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1111                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1112                                           nres, 0, 0, &tp);
1113                 if (err)
1114                         do_error(
1115         _("space reservation failed (%d), filesystem may be out of space\n"),
1116                                 err);
1117
1118                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1119                 libxfs_trans_ijoin(tp, ino_p, 0);
1120
1121                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1122                                                 nres);
1123                 if (err)
1124                         do_error(
1125         _("name create failed in %s (%d), filesystem may be out of space\n"),
1126                                 ORPHANAGE, err);
1127                 ASSERT(err == 0);
1128
1129                 set_nlink(VFS_I(ino_p), 1);
1130                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1131                 err = -libxfs_trans_commit(tp);
1132                 if (err)
1133                         do_error(
1134         _("orphanage name create failed (%d)\n"), err);
1135         }
1136         libxfs_irele(ino_p);
1137         libxfs_irele(orphanage_ip);
1138 }
1139
1140 static int
1141 entry_junked(
1142         const char      *msg,
1143         const char      *iname,
1144         xfs_ino_t       ino1,
1145         xfs_ino_t       ino2)
1146 {
1147         do_warn(msg, iname, ino1, ino2);
1148         if (!no_modify) {
1149                 if (verbose)
1150                         do_warn(_(", marking entry to be junked\n"));
1151                 else
1152                         do_warn("\n");
1153         } else
1154                 do_warn(_(", would junk entry\n"));
1155         return !no_modify;
1156 }
1157
1158 /* Find and invalidate all the directory's buffers. */
1159 static int
1160 dir_binval(
1161         struct xfs_trans        *tp,
1162         struct xfs_inode        *ip,
1163         int                     whichfork)
1164 {
1165         struct xfs_iext_cursor  icur;
1166         struct xfs_bmbt_irec    rec;
1167         struct xfs_ifork        *ifp;
1168         struct xfs_da_geometry  *geo;
1169         struct xfs_buf          *bp;
1170         xfs_dablk_t             dabno;
1171         int                     error = 0;
1172
1173         if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
1174             ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
1175                 return 0;
1176
1177         geo = tp->t_mountp->m_dir_geo;
1178         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1179         for_each_xfs_iext(ifp, &icur, &rec) {
1180                 for (dabno = roundup(rec.br_startoff, geo->fsbcount);
1181                      dabno < rec.br_startoff + rec.br_blockcount;
1182                      dabno += geo->fsbcount) {
1183                         bp = NULL;
1184                         error = -libxfs_da_get_buf(tp, ip, dabno, &bp,
1185                                         whichfork);
1186                         if (error)
1187                                 return error;
1188                         if (!bp)
1189                                 continue;
1190                         libxfs_trans_binval(tp, bp);
1191                         libxfs_trans_brelse(tp, bp);
1192                 }
1193         }
1194
1195         return error;
1196 }
1197
1198 /*
1199  * Unexpected failure during the rebuild will leave the entries in
1200  * lost+found on the next run
1201  */
1202
1203 static void
1204 longform_dir2_rebuild(
1205         struct xfs_mount        *mp,
1206         xfs_ino_t               ino,
1207         struct xfs_inode        *ip,
1208         struct ino_tree_node    *irec,
1209         int                     ino_offset,
1210         struct dir_hash_tab     *hashtab)
1211 {
1212         int                     error;
1213         int                     nres;
1214         struct xfs_trans        *tp;
1215         xfs_fileoff_t           lastblock;
1216         struct xfs_inode        pip;
1217         struct dir_hash_ent     *p;
1218         int                     done = 0;
1219
1220         /*
1221          * trash directory completely and rebuild from scratch using the
1222          * name/inode pairs in the hash table
1223          */
1224
1225         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1226
1227         /*
1228          * first attempt to locate the parent inode, if it can't be
1229          * found, set it to the root inode and it'll be moved to the
1230          * orphanage later (the inode number here needs to be valid
1231          * for the libxfs_dir_init() call).
1232          */
1233         pip.i_ino = get_inode_parent(irec, ino_offset);
1234         if (pip.i_ino == NULLFSINO ||
1235             libxfs_dir_ino_validate(mp, pip.i_ino))
1236                 pip.i_ino = mp->m_sb.sb_rootino;
1237
1238         nres = XFS_REMOVE_SPACE_RES(mp);
1239         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1240         if (error)
1241                 res_failed(error);
1242         libxfs_trans_ijoin(tp, ip, 0);
1243
1244         error = dir_binval(tp, ip, XFS_DATA_FORK);
1245         if (error)
1246                 do_error(_("error %d invalidating directory %llu blocks\n"),
1247                                 error, (unsigned long long)ip->i_ino);
1248
1249         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1250                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1251                         error);
1252
1253         /* free all data, leaf, node and freespace blocks */
1254         while (!done) {
1255                error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA,
1256                                        0, &done);
1257                if (error) {
1258                        do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1259                        goto out_bmap_cancel;
1260                }
1261                error = -libxfs_defer_finish(&tp);
1262                if (error) {
1263                        do_warn(("defer_finish failed -- error - %d\n"), error);
1264                        goto out_bmap_cancel;
1265                }
1266                /*
1267                 * Close out trans and start the next one in the chain.
1268                 */
1269                error = -libxfs_trans_roll_inode(&tp, ip);
1270                if (error)
1271                         goto out_bmap_cancel;
1272         }
1273
1274         error = -libxfs_dir_init(tp, ip, &pip);
1275         if (error) {
1276                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1277                 goto out_bmap_cancel;
1278         }
1279
1280         error = -libxfs_trans_commit(tp);
1281         if (error)
1282                 do_error(
1283         _("dir init failed (%d)\n"), error);
1284
1285         if (ino == mp->m_sb.sb_rootino)
1286                 need_root_dotdot = 0;
1287
1288         /* go through the hash list and re-add the inodes */
1289
1290         for (p = hashtab->first; p; p = p->nextbyorder) {
1291
1292                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1293                                 (p->name.len == 1 || (p->name.len == 2 &&
1294                                                 p->name.name[1] == '.'))))
1295                         continue;
1296
1297                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1298                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1299                                             nres, 0, 0, &tp);
1300                 if (error)
1301                         res_failed(error);
1302
1303                 libxfs_trans_ijoin(tp, ip, 0);
1304
1305                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1306                                                 nres);
1307                 if (error) {
1308                         do_warn(
1309 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1310                                 ino, error);
1311                         goto out_bmap_cancel;
1312                 }
1313
1314                 error = -libxfs_trans_commit(tp);
1315                 if (error)
1316                         do_error(
1317 _("name create failed (%d) during rebuild\n"), error);
1318         }
1319
1320         return;
1321
1322 out_bmap_cancel:
1323         libxfs_trans_cancel(tp);
1324         return;
1325 }
1326
1327
1328 /*
1329  * Kill a block in a version 2 inode.
1330  * Makes its own transaction.
1331  */
1332 static void
1333 dir2_kill_block(
1334         xfs_mount_t     *mp,
1335         xfs_inode_t     *ip,
1336         xfs_dablk_t     da_bno,
1337         struct xfs_buf  *bp)
1338 {
1339         xfs_da_args_t   args;
1340         int             error;
1341         int             nres;
1342         xfs_trans_t     *tp;
1343
1344         nres = XFS_REMOVE_SPACE_RES(mp);
1345         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1346         if (error)
1347                 res_failed(error);
1348         libxfs_trans_ijoin(tp, ip, 0);
1349         libxfs_trans_bjoin(tp, bp);
1350         libxfs_trans_bhold(tp, bp);
1351         memset(&args, 0, sizeof(args));
1352         args.dp = ip;
1353         args.trans = tp;
1354         args.whichfork = XFS_DATA_FORK;
1355         args.geo = mp->m_dir_geo;
1356         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1357                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1358         else
1359                 error = -libxfs_dir2_shrink_inode(&args,
1360                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1361         if (error)
1362                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1363                         ip->i_ino, da_bno);
1364         error = -libxfs_trans_commit(tp);
1365         if (error)
1366                 do_error(
1367 _("directory shrink failed (%d)\n"), error);
1368 }
1369
1370 /*
1371  * process a data block, also checks for .. entry
1372  * and corrects it to match what we think .. should be
1373  */
1374 static void
1375 longform_dir2_entry_check_data(
1376         struct xfs_mount        *mp,
1377         struct xfs_inode        *ip,
1378         int                     *num_illegal,
1379         int                     *need_dot,
1380         struct ino_tree_node    *current_irec,
1381         int                     current_ino_offset,
1382         struct xfs_buf          *bp,
1383         struct dir_hash_tab     *hashtab,
1384         freetab_t               **freetabp,
1385         xfs_dablk_t             da_bno,
1386         int                     isblock)
1387 {
1388         xfs_dir2_dataptr_t      addr;
1389         xfs_dir2_leaf_entry_t   *blp;
1390         xfs_dir2_block_tail_t   *btp;
1391         struct xfs_dir2_data_hdr *d;
1392         xfs_dir2_db_t           db;
1393         xfs_dir2_data_entry_t   *dep;
1394         xfs_dir2_data_unused_t  *dup;
1395         struct xfs_dir2_data_free *bf;
1396         char                    *endptr;
1397         int                     error;
1398         char                    fname[MAXNAMELEN + 1];
1399         freetab_t               *freetab;
1400         int                     i;
1401         int                     ino_offset;
1402         xfs_ino_t               inum;
1403         ino_tree_node_t         *irec;
1404         int                     junkit;
1405         int                     lastfree;
1406         int                     len;
1407         int                     nbad;
1408         int                     needlog;
1409         int                     needscan;
1410         xfs_ino_t               parent;
1411         char                    *ptr;
1412         xfs_trans_t             *tp;
1413         int                     wantmagic;
1414         struct xfs_da_args      da = {
1415                 .dp = ip,
1416                 .geo = mp->m_dir_geo,
1417         };
1418
1419
1420         d = bp->b_addr;
1421         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1422         nbad = 0;
1423         needscan = needlog = 0;
1424         junkit = 0;
1425         freetab = *freetabp;
1426         if (isblock) {
1427                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1428                 blp = xfs_dir2_block_leaf_p(btp);
1429                 endptr = (char *)blp;
1430                 if (endptr > (char *)btp)
1431                         endptr = (char *)btp;
1432                 if (xfs_sb_version_hascrc(&mp->m_sb))
1433                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1434                 else
1435                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1436         } else {
1437                 endptr = (char *)d + mp->m_dir_geo->blksize;
1438                 if (xfs_sb_version_hascrc(&mp->m_sb))
1439                         wantmagic = XFS_DIR3_DATA_MAGIC;
1440                 else
1441                         wantmagic = XFS_DIR2_DATA_MAGIC;
1442         }
1443         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1444
1445         /* check for data block beyond expected end */
1446         if (freetab->naents <= db) {
1447                 struct freetab_ent e;
1448
1449                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1450                 if (!freetab) {
1451                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1452                                 __func__, FREETAB_SIZE(db + 1));
1453                 }
1454                 e.v = NULLDATAOFF;
1455                 e.s = 0;
1456                 for (i = freetab->naents; i < db; i++)
1457                         freetab->ents[i] = e;
1458                 freetab->naents = db + 1;
1459         }
1460
1461         /* check the data block */
1462         while (ptr < endptr) {
1463
1464                 /* check for freespace */
1465                 dup = (xfs_dir2_data_unused_t *)ptr;
1466                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1467
1468                         /* check for invalid freespace length */
1469                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1470                                         be16_to_cpu(dup->length) == 0 ||
1471                                         (be16_to_cpu(dup->length) &
1472                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1473                                 break;
1474
1475                         /* check for invalid tag */
1476                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1477                                                 (char *)dup - (char *)d)
1478                                 break;
1479
1480                         /* check for block with no data entries */
1481                         if ((ptr == (char *)d + mp->m_dir_geo->data_entry_offset) &&
1482                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1483                                 junkit = 1;
1484                                 *num_illegal += 1;
1485                                 break;
1486                         }
1487
1488                         /* continue at the end of the freespace */
1489                         ptr += be16_to_cpu(dup->length);
1490                         if (ptr >= endptr)
1491                                 break;
1492                 }
1493
1494                 /* validate data entry size */
1495                 dep = (xfs_dir2_data_entry_t *)ptr;
1496                 if (ptr + libxfs_dir2_data_entsize(mp, dep->namelen) > endptr)
1497                         break;
1498                 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp, dep)) !=
1499                                                 (char *)dep - (char *)d)
1500                         break;
1501                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1502         }
1503
1504         /* did we find an empty or corrupt block? */
1505         if (ptr != endptr) {
1506                 if (junkit) {
1507                         do_warn(
1508         _("empty data block %u in directory inode %" PRIu64 ": "),
1509                                 da_bno, ip->i_ino);
1510                 } else {
1511                         do_warn(_
1512         ("corrupt block %u in directory inode %" PRIu64 ": "),
1513                                 da_bno, ip->i_ino);
1514                 }
1515                 if (!no_modify) {
1516                         do_warn(_("junking block\n"));
1517                         dir2_kill_block(mp, ip, da_bno, bp);
1518                 } else {
1519                         do_warn(_("would junk block\n"));
1520                 }
1521                 freetab->ents[db].v = NULLDATAOFF;
1522                 return;
1523         }
1524
1525         /* update number of data blocks processed */
1526         if (freetab->nents < db + 1)
1527                 freetab->nents = db + 1;
1528
1529         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1530         if (error)
1531                 res_failed(error);
1532         da.trans = tp;
1533         libxfs_trans_ijoin(tp, ip, 0);
1534         libxfs_trans_bjoin(tp, bp);
1535         libxfs_trans_bhold(tp, bp);
1536         if (be32_to_cpu(d->magic) != wantmagic) {
1537                 do_warn(
1538         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1539                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1540                 if (!no_modify) {
1541                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1542                         d->magic = cpu_to_be32(wantmagic);
1543                         needlog = 1;
1544                 } else
1545                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1546         }
1547         lastfree = 0;
1548         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1549         /*
1550          * look at each entry.  reference inode pointed to by each
1551          * entry in the incore inode tree.
1552          * if not a directory, set reached flag, increment link count
1553          * if a directory and reached, mark entry as to be deleted.
1554          * if a directory, check to see if recorded parent
1555          *      matches current inode #,
1556          *      if so, then set reached flag, increment link count
1557          *              of current and child dir inodes, push the child
1558          *              directory inode onto the directory stack.
1559          *      if current inode != parent, then mark entry to be deleted.
1560          */
1561         while (ptr < endptr) {
1562                 dup = (xfs_dir2_data_unused_t *)ptr;
1563                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1564                         if (lastfree) {
1565                                 do_warn(
1566         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1567                                         ip->i_ino, da_bno);
1568                                 if (!no_modify) {
1569
1570                                         do_warn(_("joining together\n"));
1571                                         len = be16_to_cpu(dup->length);
1572                                         libxfs_dir2_data_use_free(&da, bp, dup,
1573                                                 ptr - (char *)d, len, &needlog,
1574                                                 &needscan);
1575                                         libxfs_dir2_data_make_free(&da, bp,
1576                                                 ptr - (char *)d, len, &needlog,
1577                                                 &needscan);
1578                                 } else
1579                                         do_warn(_("would join together\n"));
1580                         }
1581                         ptr += be16_to_cpu(dup->length);
1582                         lastfree = 1;
1583                         continue;
1584                 }
1585                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1586                                                   ptr - (char *)d);
1587                 dep = (xfs_dir2_data_entry_t *)ptr;
1588                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1589                 inum = be64_to_cpu(dep->inumber);
1590                 lastfree = 0;
1591                 /*
1592                  * skip bogus entries (leading '/').  they'll be deleted
1593                  * later.  must still log it, else we leak references to
1594                  * buffers.
1595                  */
1596                 if (dep->name[0] == '/')  {
1597                         nbad++;
1598                         if (!no_modify)
1599                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1600                         continue;
1601                 }
1602
1603                 memmove(fname, dep->name, dep->namelen);
1604                 fname[dep->namelen] = '\0';
1605                 ASSERT(inum != NULLFSINO);
1606
1607                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1608                                         XFS_INO_TO_AGINO(mp, inum));
1609                 if (irec == NULL)  {
1610                         nbad++;
1611                         if (entry_junked(
1612         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1613                                         fname, ip->i_ino, inum)) {
1614                                 dep->name[0] = '/';
1615                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1616                         }
1617                         continue;
1618                 }
1619                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1620
1621                 /*
1622                  * if it's a free inode, blow out the entry.
1623                  * by now, any inode that we think is free
1624                  * really is free.
1625                  */
1626                 if (is_inode_free(irec, ino_offset))  {
1627                         nbad++;
1628                         if (entry_junked(
1629         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1630                                         fname, ip->i_ino, inum)) {
1631                                 dep->name[0] = '/';
1632                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1633                         }
1634                         continue;
1635                 }
1636
1637                 /*
1638                  * check if this inode is lost+found dir in the root
1639                  */
1640                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1641                         /*
1642                          * if it's not a directory, trash it
1643                          */
1644                         if (!inode_isadir(irec, ino_offset)) {
1645                                 nbad++;
1646                                 if (entry_junked(
1647         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1648                                                 ORPHANAGE, inum, ip->i_ino)) {
1649                                         dep->name[0] = '/';
1650                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1651                                 }
1652                                 continue;
1653                         }
1654                         /*
1655                          * if this is a dup, it will be picked up below,
1656                          * otherwise, mark it as the orphanage for later.
1657                          */
1658                         if (!orphanage_ino)
1659                                 orphanage_ino = inum;
1660                 }
1661
1662                 /*
1663                  * check for duplicate names in directory.
1664                  */
1665                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1666                                 dep->name, libxfs_dir2_data_get_ftype(mp, dep))) {
1667                         nbad++;
1668                         if (entry_junked(
1669         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1670                                         fname, inum, ip->i_ino)) {
1671                                 dep->name[0] = '/';
1672                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1673                         }
1674                         if (inum == orphanage_ino)
1675                                 orphanage_ino = 0;
1676                         continue;
1677                 }
1678
1679                 /*
1680                  * if just scanning to rebuild a directory due to a ".."
1681                  * update, just continue
1682                  */
1683                 if (dotdot_update)
1684                         continue;
1685
1686                 /*
1687                  * skip the '..' entry since it's checked when the
1688                  * directory is reached by something else.  if it never
1689                  * gets reached, it'll be moved to the orphanage and we'll
1690                  * take care of it then. If it doesn't exist at all, the
1691                  * directory needs to be rebuilt first before being added
1692                  * to the orphanage.
1693                  */
1694                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1695                                 dep->name[1] == '.') {
1696                         if (da_bno != 0) {
1697                                 /* ".." should be in the first block */
1698                                 nbad++;
1699                                 if (entry_junked(
1700         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1701                                                 inum, ip->i_ino)) {
1702                                         dep->name[0] = '/';
1703                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1704                                 }
1705                         }
1706                         continue;
1707                 }
1708                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
1709                 /*
1710                  * special case the . entry.  we know there's only one
1711                  * '.' and only '.' points to itself because bogus entries
1712                  * got trashed in phase 3 if there were > 1.
1713                  * bump up link count for '.' but don't set reached
1714                  * until we're actually reached by another directory
1715                  * '..' is already accounted for or will be taken care
1716                  * of when directory is moved to orphanage.
1717                  */
1718                 if (ip->i_ino == inum)  {
1719                         ASSERT(no_modify ||
1720                                (dep->name[0] == '.' && dep->namelen == 1));
1721                         add_inode_ref(current_irec, current_ino_offset);
1722                         if (da_bno != 0 ||
1723                             dep != (void *)d + mp->m_dir_geo->data_entry_offset) {
1724                                 /* "." should be the first entry */
1725                                 nbad++;
1726                                 if (entry_junked(
1727         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1728                                                 fname, inum, ip->i_ino)) {
1729                                         dep->name[0] = '/';
1730                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1731                                 }
1732                         }
1733                         *need_dot = 0;
1734                         continue;
1735                 }
1736                 /*
1737                  * skip entries with bogus inumbers if we're in no modify mode
1738                  */
1739                 if (no_modify && !libxfs_verify_dir_ino(mp, inum))
1740                         continue;
1741
1742                 /* validate ftype field if supported */
1743                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1744                         uint8_t dir_ftype;
1745                         uint8_t ino_ftype;
1746
1747                         dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
1748                         ino_ftype = get_inode_ftype(irec, ino_offset);
1749
1750                         if (dir_ftype != ino_ftype) {
1751                                 if (no_modify) {
1752                                         do_warn(
1753         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1754                                                 dir_ftype, ino_ftype,
1755                                                 ip->i_ino, inum);
1756                                 } else {
1757                                         do_warn(
1758         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1759                                                 dir_ftype, ino_ftype,
1760                                                 ip->i_ino, inum);
1761                                         libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
1762                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1763                                         dir_hash_update_ftype(hashtab, addr,
1764                                                               ino_ftype);
1765                                 }
1766                         }
1767                 }
1768
1769                 /*
1770                  * check easy case first, regular inode, just bump
1771                  * the link count and continue
1772                  */
1773                 if (!inode_isadir(irec, ino_offset))  {
1774                         add_inode_reached(irec, ino_offset);
1775                         continue;
1776                 }
1777                 parent = get_inode_parent(irec, ino_offset);
1778                 ASSERT(parent != 0);
1779                 junkit = 0;
1780                 /*
1781                  * bump up the link counts in parent and child
1782                  * directory but if the link doesn't agree with
1783                  * the .. in the child, blow out the entry.
1784                  * if the directory has already been reached,
1785                  * blow away the entry also.
1786                  */
1787                 if (is_inode_reached(irec, ino_offset))  {
1788                         junkit = 1;
1789                         do_warn(
1790 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1791                                 fname, ip->i_ino, inum);
1792                 } else if (parent == ip->i_ino)  {
1793                         add_inode_reached(irec, ino_offset);
1794                         add_inode_ref(current_irec, current_ino_offset);
1795                 } else if (parent == NULLFSINO) {
1796                         /* ".." was missing, but this entry refers to it,
1797                            so, set it as the parent and mark for rebuild */
1798                         do_warn(
1799         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1800                                 fname, ip->i_ino, inum);
1801                         set_inode_parent(irec, ino_offset, ip->i_ino);
1802                         add_inode_reached(irec, ino_offset);
1803                         add_inode_ref(current_irec, current_ino_offset);
1804                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1805                                                                 ino_offset);
1806                 } else  {
1807                         junkit = 1;
1808                         do_warn(
1809 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1810                                 fname, ip->i_ino, parent, inum);
1811                 }
1812                 if (junkit)  {
1813                         if (inum == orphanage_ino)
1814                                 orphanage_ino = 0;
1815                         nbad++;
1816                         if (!no_modify)  {
1817                                 dep->name[0] = '/';
1818                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1819                                 if (verbose)
1820                                         do_warn(
1821                                         _("\twill clear entry \"%s\"\n"),
1822                                                 fname);
1823                         } else  {
1824                                 do_warn(_("\twould clear entry \"%s\"\n"),
1825                                         fname);
1826                         }
1827                 }
1828         }
1829         *num_illegal += nbad;
1830         if (needscan)
1831                 libxfs_dir2_data_freescan(mp, d, &i);
1832         if (needlog)
1833                 libxfs_dir2_data_log_header(&da, bp);
1834         error = -libxfs_trans_commit(tp);
1835         if (error)
1836                 do_error(
1837 _("directory block fixing failed (%d)\n"), error);
1838
1839         /* record the largest free space in the freetab for later checking */
1840         bf = libxfs_dir2_data_bestfree_p(mp, d);
1841         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1842         freetab->ents[db].s = 0;
1843 }
1844
1845 /* check v5 metadata */
1846 static int
1847 __check_dir3_header(
1848         struct xfs_mount        *mp,
1849         struct xfs_buf          *bp,
1850         xfs_ino_t               ino,
1851         __be64                  owner,
1852         __be64                  blkno,
1853         uuid_t                  *uuid)
1854 {
1855
1856         /* verify owner */
1857         if (be64_to_cpu(owner) != ino) {
1858                 do_warn(
1859 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
1860                         ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
1861                 return 1;
1862         }
1863         /* verify block number */
1864         if (be64_to_cpu(blkno) != bp->b_bn) {
1865                 do_warn(
1866 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
1867                         bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
1868                 return 1;
1869         }
1870         /* verify uuid */
1871         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
1872                 do_warn(
1873 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
1874                         ino, bp->b_bn);
1875                 return 1;
1876         }
1877
1878         return 0;
1879 }
1880
1881 static int
1882 check_da3_header(
1883         struct xfs_mount        *mp,
1884         struct xfs_buf          *bp,
1885         xfs_ino_t               ino)
1886 {
1887         struct xfs_da3_blkinfo  *info = bp->b_addr;
1888
1889         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1890                                    &info->uuid);
1891 }
1892
1893 static int
1894 check_dir3_header(
1895         struct xfs_mount        *mp,
1896         struct xfs_buf          *bp,
1897         xfs_ino_t               ino)
1898 {
1899         struct xfs_dir3_blk_hdr *info = bp->b_addr;
1900
1901         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1902                                    &info->uuid);
1903 }
1904
1905 /*
1906  * Check contents of leaf-form block.
1907  */
1908 static int
1909 longform_dir2_check_leaf(
1910         struct xfs_mount        *mp,
1911         struct xfs_inode        *ip,
1912         struct dir_hash_tab     *hashtab,
1913         struct freetab          *freetab)
1914 {
1915         int                     badtail;
1916         __be16                  *bestsp;
1917         struct xfs_buf          *bp;
1918         xfs_dablk_t             da_bno;
1919         int                     i;
1920         xfs_dir2_leaf_t         *leaf;
1921         xfs_dir2_leaf_tail_t    *ltp;
1922         int                     seeval;
1923         struct xfs_dir2_leaf_entry *ents;
1924         struct xfs_dir3_icleaf_hdr leafhdr;
1925         int                     error;
1926         int                     fixit = 0;
1927
1928         da_bno = mp->m_dir_geo->leafblk;
1929         error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_leaf1_buf_ops, &fixit);
1930         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
1931                 do_warn(
1932         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
1933                         da_bno, ip->i_ino);
1934                 return 1;
1935         } else if (error) {
1936                 do_error(
1937         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
1938                         da_bno, ip->i_ino, error);
1939                 /* NOTREACHED */
1940         }
1941
1942         leaf = bp->b_addr;
1943         libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
1944         ents = leafhdr.ents;
1945         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
1946         bestsp = xfs_dir2_leaf_bests_p(ltp);
1947         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1948               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
1949                                 leafhdr.forw || leafhdr.back ||
1950                                 leafhdr.count < leafhdr.stale ||
1951                                 leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
1952                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
1953                 do_warn(
1954         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
1955                         da_bno, ip->i_ino);
1956                 libxfs_buf_relse(bp);
1957                 return 1;
1958         }
1959
1960         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
1961                 error = check_da3_header(mp, bp, ip->i_ino);
1962                 if (error) {
1963                         libxfs_buf_relse(bp);
1964                         return error;
1965                 }
1966         }
1967
1968         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
1969         if (dir_hash_check(hashtab, ip, seeval)) {
1970                 libxfs_buf_relse(bp);
1971                 return 1;
1972         }
1973         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
1974         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
1975                 freetab->ents[i].s = 1;
1976                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
1977         }
1978         if (badtail) {
1979                 do_warn(
1980         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
1981                         da_bno, ip->i_ino);
1982                 libxfs_buf_relse(bp);
1983                 return 1;
1984         }
1985         libxfs_buf_relse(bp);
1986         return fixit;
1987 }
1988
1989 /*
1990  * Check contents of the node blocks (leaves)
1991  * Looks for matching hash values for the data entries.
1992  */
1993 static int
1994 longform_dir2_check_node(
1995         struct xfs_mount        *mp,
1996         struct xfs_inode        *ip,
1997         struct dir_hash_tab     *hashtab,
1998         struct freetab          *freetab)
1999 {
2000         struct xfs_buf          *bp;
2001         xfs_dablk_t             da_bno;
2002         xfs_dir2_db_t           fdb;
2003         xfs_dir2_free_t         *free;
2004         int                     i;
2005         xfs_dir2_leaf_t         *leaf;
2006         xfs_fileoff_t           next_da_bno;
2007         int                     seeval = 0;
2008         int                     used;
2009         struct xfs_dir2_leaf_entry *ents;
2010         struct xfs_dir3_icleaf_hdr leafhdr;
2011         struct xfs_dir3_icfree_hdr freehdr;
2012         __be16                  *bests;
2013         int                     error;
2014         int                     fixit = 0;
2015
2016         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2017                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2018                         da_bno = (xfs_dablk_t)next_da_bno) {
2019                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2020                 if (bmap_next_offset(ip, &next_da_bno))
2021                         break;
2022
2023                 /*
2024                  * we need to use the da3 node verifier here as it handles the
2025                  * fact that reading the leaf hash tree blocks can return either
2026                  * leaf or node blocks and calls the correct verifier. If we get
2027                  * a node block, then we'll skip it below based on a magic
2028                  * number check.
2029                  */
2030                 error = dir_read_buf(ip, da_bno, &bp, &xfs_da3_node_buf_ops,
2031                                 &fixit);
2032                 if (error) {
2033                         do_warn(
2034         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2035                                 da_bno, ip->i_ino, error);
2036                         return 1;
2037                 }
2038                 leaf = bp->b_addr;
2039                 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2040                 ents = leafhdr.ents;
2041                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2042                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2043                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2044                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2045                         do_warn(
2046         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2047                                 leafhdr.magic, da_bno, ip->i_ino);
2048                         libxfs_buf_relse(bp);
2049                         return 1;
2050                 }
2051
2052                 /* check v5 metadata */
2053                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2054                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2055                         error = check_da3_header(mp, bp, ip->i_ino);
2056                         if (error) {
2057                                 libxfs_buf_relse(bp);
2058                                 return error;
2059                         }
2060                 }
2061
2062                 /* ignore nodes */
2063                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2064                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2065                         libxfs_buf_relse(bp);
2066                         continue;
2067                 }
2068
2069                 /*
2070                  * If there's a validator error, we need to ensure that we got
2071                  * the right ops on the buffer for when we write it back out.
2072                  */
2073                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2074                 if (leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2075                     leafhdr.count < leafhdr.stale) {
2076                         do_warn(
2077         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2078                                 da_bno, ip->i_ino);
2079                         libxfs_buf_relse(bp);
2080                         return 1;
2081                 }
2082                 seeval = dir_hash_see_all(hashtab, ents,
2083                                         leafhdr.count, leafhdr.stale);
2084                 libxfs_buf_relse(bp);
2085                 if (seeval != DIR_HASH_CK_OK)
2086                         return 1;
2087         }
2088         if (dir_hash_check(hashtab, ip, seeval))
2089                 return 1;
2090
2091         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2092              next_da_bno != NULLFILEOFF;
2093              da_bno = (xfs_dablk_t)next_da_bno) {
2094                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2095                 if (bmap_next_offset(ip, &next_da_bno))
2096                         break;
2097
2098                 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_free_buf_ops,
2099                                 &fixit);
2100                 if (error) {
2101                         do_warn(
2102         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2103                                 da_bno, ip->i_ino, error);
2104                         return 1;
2105                 }
2106                 free = bp->b_addr;
2107                 libxfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
2108                 bests = freehdr.bests;
2109                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2110                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2111                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2112                     freehdr.firstdb !=
2113                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2114                         mp->m_dir_geo->free_max_bests ||
2115                     freehdr.nvalid < freehdr.nused) {
2116                         do_warn(
2117         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2118                                 da_bno, ip->i_ino);
2119                         libxfs_buf_relse(bp);
2120                         return 1;
2121                 }
2122
2123                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2124                         error = check_dir3_header(mp, bp, ip->i_ino);
2125                         if (error) {
2126                                 libxfs_buf_relse(bp);
2127                                 return error;
2128                         }
2129                 }
2130                 for (i = used = 0; i < freehdr.nvalid; i++) {
2131                         if (i + freehdr.firstdb >= freetab->nents ||
2132                                         freetab->ents[i + freehdr.firstdb].v !=
2133                                                 be16_to_cpu(bests[i])) {
2134                                 do_warn(
2135         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2136                                         da_bno, i, ip->i_ino);
2137                                 libxfs_buf_relse(bp);
2138                                 return 1;
2139                         }
2140                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2141                         freetab->ents[i + freehdr.firstdb].s = 1;
2142                 }
2143                 if (used != freehdr.nused) {
2144                         do_warn(
2145         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2146                                 da_bno, ip->i_ino);
2147                         libxfs_buf_relse(bp);
2148                         return 1;
2149                 }
2150                 libxfs_buf_relse(bp);
2151         }
2152         for (i = 0; i < freetab->nents; i++) {
2153                 if ((freetab->ents[i].s == 0) &&
2154                     (freetab->ents[i].v != NULLDATAOFF)) {
2155                         do_warn(
2156         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2157                                 i, ip->i_ino);
2158                         return 1;
2159                 }
2160         }
2161         return fixit;
2162 }
2163
2164 /*
2165  * If a directory is corrupt, we need to read in as many entries as possible,
2166  * destroy the entry and create a new one with recovered name/inode pairs.
2167  * (ie. get libxfs to do all the grunt work)
2168  */
2169 static void
2170 longform_dir2_entry_check(
2171         struct xfs_mount        *mp,
2172         xfs_ino_t               ino,
2173         struct xfs_inode        *ip,
2174         int                     *num_illegal,
2175         int                     *need_dot,
2176         struct ino_tree_node    *irec,
2177         int                     ino_offset,
2178         struct dir_hash_tab     *hashtab)
2179 {
2180         struct xfs_buf          *bp;
2181         xfs_dablk_t             da_bno;
2182         freetab_t               *freetab;
2183         int                     i;
2184         int                     isblock;
2185         int                     isleaf;
2186         xfs_fileoff_t           next_da_bno;
2187         int                     seeval;
2188         int                     fixit = 0;
2189         struct xfs_da_args      args;
2190
2191         *need_dot = 1;
2192         freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2193         if (!freetab) {
2194                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2195                         __func__,
2196                         FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2197                 exit(1);
2198         }
2199         freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2200         freetab->nents = 0;
2201         for (i = 0; i < freetab->naents; i++) {
2202                 freetab->ents[i].v = NULLDATAOFF;
2203                 freetab->ents[i].s = 0;
2204         }
2205
2206         /* is this a block, leaf, or node directory? */
2207         args.dp = ip;
2208         args.geo = mp->m_dir_geo;
2209         libxfs_dir2_isblock(&args, &isblock);
2210         libxfs_dir2_isleaf(&args, &isleaf);
2211
2212         /* check directory "data" blocks (ie. name/inode pairs) */
2213         for (da_bno = 0, next_da_bno = 0;
2214              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2215              da_bno = (xfs_dablk_t)next_da_bno) {
2216                 const struct xfs_buf_ops *ops;
2217                 int                      error;
2218                 struct xfs_dir2_data_hdr *d;
2219
2220                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2221                 if (bmap_next_offset(ip, &next_da_bno)) {
2222                         /*
2223                          * if this is the first block, there isn't anything we
2224                          * can recover so we just trash it.
2225                          */
2226                          if (da_bno == 0) {
2227                                 fixit++;
2228                                 goto out_fix;
2229                         }
2230                         break;
2231                 }
2232
2233                 if (isblock)
2234                         ops = &xfs_dir3_block_buf_ops;
2235                 else
2236                         ops = &xfs_dir3_data_buf_ops;
2237
2238                 error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
2239                 if (error) {
2240                         do_warn(
2241         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2242                                 da_bno, ino, error);
2243                         *num_illegal += 1;
2244
2245                         /*
2246                          * we try to read all "data" blocks, but if we are in
2247                          * block form and we fail, there isn't anything else to
2248                          * read, and nothing we can do but trash it.
2249                          */
2250                         if (isblock) {
2251                                 fixit++;
2252                                 goto out_fix;
2253                         }
2254                         continue;
2255                 }
2256
2257                 /* check v5 metadata */
2258                 d = bp->b_addr;
2259                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2260                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2261                         error = check_dir3_header(mp, bp, ino);
2262                         if (error) {
2263                                 fixit++;
2264                                 if (isblock)
2265                                         goto out_fix;
2266                                 continue;
2267                         }
2268                 }
2269
2270                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2271                                 irec, ino_offset, bp, hashtab,
2272                                 &freetab, da_bno, isblock);
2273                 if (isblock)
2274                         break;
2275
2276                 libxfs_buf_relse(bp);
2277         }
2278         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2279
2280         if (!dotdot_update) {
2281                 /* check btree and freespace */
2282                 if (isblock) {
2283                         struct xfs_dir2_data_hdr *block;
2284                         xfs_dir2_block_tail_t   *btp;
2285                         xfs_dir2_leaf_entry_t   *blp;
2286
2287                         block = bp->b_addr;
2288                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2289                         blp = xfs_dir2_block_leaf_p(btp);
2290                         seeval = dir_hash_see_all(hashtab, blp,
2291                                                 be32_to_cpu(btp->count),
2292                                                 be32_to_cpu(btp->stale));
2293                         if (dir_hash_check(hashtab, ip, seeval))
2294                                 fixit |= 1;
2295                 } else if (isleaf) {
2296                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2297                                                                 freetab);
2298                 } else {
2299                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2300                                                                 freetab);
2301                 }
2302         }
2303 out_fix:
2304         if (isblock && bp)
2305                 libxfs_buf_relse(bp);
2306
2307         if (!no_modify && (fixit || dotdot_update)) {
2308                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2309                 *num_illegal = 0;
2310                 *need_dot = 0;
2311         } else {
2312                 if (fixit || dotdot_update)
2313                         do_warn(
2314         _("would rebuild directory inode %" PRIu64 "\n"), ino);
2315         }
2316
2317         free(freetab);
2318 }
2319
2320 /*
2321  * shortform directory v2 processing routines -- entry verification and
2322  * bad entry deletion (pruning).
2323  */
2324 static struct xfs_dir2_sf_entry *
2325 shortform_dir2_junk(
2326         struct xfs_mount        *mp,
2327         struct xfs_dir2_sf_hdr  *sfp,
2328         struct xfs_dir2_sf_entry *sfep,
2329         xfs_ino_t               lino,
2330         int                     *max_size,
2331         int                     *index,
2332         int                     *bytes_deleted,
2333         int                     *ino_dirty)
2334 {
2335         struct xfs_dir2_sf_entry *next_sfep;
2336         int                     next_len;
2337         int                     next_elen;
2338
2339         if (lino == orphanage_ino)
2340                 orphanage_ino = 0;
2341
2342         next_elen = libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen);
2343         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2344
2345         /*
2346          * if we are just checking, simply return the pointer to the next entry
2347          * here so that the checking loop can continue.
2348          */
2349         if (no_modify) {
2350                 do_warn(_("would junk entry\n"));
2351                 return next_sfep;
2352         }
2353
2354         /*
2355          * now move all the remaining entries down over the junked entry and
2356          * clear the newly unused bytes at the tail of the directory region.
2357          */
2358         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2359         *max_size -= next_elen;
2360         *bytes_deleted += next_elen;
2361
2362         memmove(sfep, next_sfep, next_len);
2363         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2364         sfp->count -= 1;
2365         *ino_dirty = 1;
2366
2367         /*
2368          * WARNING:  drop the index i by one so it matches the decremented count
2369          * for accurate comparisons in the loop test
2370          */
2371         (*index)--;
2372
2373         if (verbose)
2374                 do_warn(_("junking entry\n"));
2375         else
2376                 do_warn("\n");
2377         return sfep;
2378 }
2379
2380 static void
2381 shortform_dir2_entry_check(
2382         struct xfs_mount        *mp,
2383         xfs_ino_t               ino,
2384         struct xfs_inode        *ip,
2385         int                     *ino_dirty,
2386         struct ino_tree_node    *current_irec,
2387         int                     current_ino_offset,
2388         struct dir_hash_tab     *hashtab)
2389 {
2390         xfs_ino_t               lino;
2391         xfs_ino_t               parent;
2392         struct xfs_dir2_sf_hdr  *sfp;
2393         struct xfs_dir2_sf_entry *sfep;
2394         struct xfs_dir2_sf_entry *next_sfep;
2395         struct xfs_ifork        *ifp;
2396         struct ino_tree_node    *irec;
2397         int                     max_size;
2398         int                     ino_offset;
2399         int                     i;
2400         int                     bad_sfnamelen;
2401         int                     namelen;
2402         int                     bytes_deleted;
2403         char                    fname[MAXNAMELEN + 1];
2404         int                     i8;
2405
2406         ifp = &ip->i_df;
2407         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2408         *ino_dirty = 0;
2409         bytes_deleted = 0;
2410
2411         max_size = ifp->if_bytes;
2412         ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2413
2414         /*
2415          * if just rebuild a directory due to a "..", update and return
2416          */
2417         if (dotdot_update) {
2418                 parent = get_inode_parent(current_irec, current_ino_offset);
2419                 if (no_modify) {
2420                         do_warn(
2421         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2422                                 ino, parent);
2423                 } else {
2424                         do_warn(
2425         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2426                                 ino, parent);
2427                         libxfs_dir2_sf_put_parent_ino(sfp, parent);
2428                         *ino_dirty = 1;
2429                 }
2430                 return;
2431         }
2432
2433         /*
2434          * no '.' entry in shortform dirs, just bump up ref count by 1
2435          * '..' was already (or will be) accounted for and checked when
2436          * the directory is reached or will be taken care of when the
2437          * directory is moved to orphanage.
2438          */
2439         add_inode_ref(current_irec, current_ino_offset);
2440
2441         /*
2442          * Initialise i8 counter -- the parent inode number counts as well.
2443          */
2444         i8 = libxfs_dir2_sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2445
2446         /*
2447          * now run through entries, stop at first bad entry, don't need
2448          * to skip over '..' since that's encoded in its own field and
2449          * no need to worry about '.' since it doesn't exist.
2450          */
2451         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2452
2453         for (i = 0; i < sfp->count && max_size >
2454                                         (intptr_t)next_sfep - (intptr_t)sfp;
2455                         sfep = next_sfep, i++)  {
2456                 bad_sfnamelen = 0;
2457
2458                 lino = libxfs_dir2_sf_get_ino(mp, sfp, sfep);
2459
2460                 namelen = sfep->namelen;
2461
2462                 ASSERT(no_modify || namelen > 0);
2463
2464                 if (no_modify && namelen == 0)  {
2465                         /*
2466                          * if we're really lucky, this is
2467                          * the last entry in which case we
2468                          * can use the dir size to set the
2469                          * namelen value.  otherwise, forget
2470                          * it because we're not going to be
2471                          * able to find the next entry.
2472                          */
2473                         bad_sfnamelen = 1;
2474
2475                         if (i == sfp->count - 1)  {
2476                                 namelen = ip->i_d.di_size -
2477                                         ((intptr_t) &sfep->name[0] -
2478                                          (intptr_t) sfp);
2479                         } else  {
2480                                 /*
2481                                  * don't process the rest of the directory,
2482                                  * break out of processing loop
2483                                  */
2484                                 break;
2485                         }
2486                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2487                                 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
2488                                 > ip->i_d.di_size)  {
2489                         bad_sfnamelen = 1;
2490
2491                         if (i == sfp->count - 1)  {
2492                                 namelen = ip->i_d.di_size -
2493                                         ((intptr_t) &sfep->name[0] -
2494                                          (intptr_t) sfp);
2495                         } else  {
2496                                 /*
2497                                  * don't process the rest of the directory,
2498                                  * break out of processing loop
2499                                  */
2500                                 break;
2501                         }
2502                 }
2503
2504                 memmove(fname, sfep->name, sfep->namelen);
2505                 fname[sfep->namelen] = '\0';
2506
2507                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2508                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, lino));
2509
2510                 /*
2511                  * Also skip entries with bogus inode numbers if we're
2512                  * in no modify mode.
2513                  */
2514
2515                 if (no_modify && !libxfs_verify_dir_ino(mp, lino))  {
2516                         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2517                         continue;
2518                 }
2519
2520                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2521                                         XFS_INO_TO_AGINO(mp, lino));
2522
2523                 if (irec == NULL)  {
2524                         do_warn(
2525         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2526                                 fname, ino, lino);
2527                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2528                                                 &max_size, &i, &bytes_deleted,
2529                                                 ino_dirty);
2530                         continue;
2531                 }
2532
2533                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2534
2535                 /*
2536                  * if it's a free inode, blow out the entry.
2537                  * by now, any inode that we think is free
2538                  * really is free.
2539                  */
2540                 if (is_inode_free(irec, ino_offset))  {
2541                         do_warn(
2542         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2543                                 fname, ino, lino);
2544                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2545                                                 &max_size, &i, &bytes_deleted,
2546                                                 ino_dirty);
2547                         continue;
2548                 }
2549                 /*
2550                  * check if this inode is lost+found dir in the root
2551                  */
2552                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2553                         /*
2554                          * if it's not a directory, trash it
2555                          */
2556                         if (!inode_isadir(irec, ino_offset)) {
2557                                 do_warn(
2558         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2559                                         ORPHANAGE, lino, ino);
2560                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2561                                                 lino, &max_size, &i,
2562                                                 &bytes_deleted, ino_dirty);
2563                                 continue;
2564                         }
2565                         /*
2566                          * if this is a dup, it will be picked up below,
2567                          * otherwise, mark it as the orphanage for later.
2568                          */
2569                         if (!orphanage_ino)
2570                                 orphanage_ino = lino;
2571                 }
2572                 /*
2573                  * check for duplicate names in directory.
2574                  */
2575                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2576                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2577                                 lino, sfep->namelen, sfep->name,
2578                                 libxfs_dir2_sf_get_ftype(mp, sfep))) {
2579                         do_warn(
2580 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2581                                 fname, lino, ino);
2582                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2583                                                 &max_size, &i, &bytes_deleted,
2584                                                 ino_dirty);
2585                         continue;
2586                 }
2587
2588                 if (!inode_isadir(irec, ino_offset))  {
2589                         /*
2590                          * check easy case first, regular inode, just bump
2591                          * the link count
2592                          */
2593                         add_inode_reached(irec, ino_offset);
2594                 } else  {
2595                         parent = get_inode_parent(irec, ino_offset);
2596
2597                         /*
2598                          * bump up the link counts in parent and child.
2599                          * directory but if the link doesn't agree with
2600                          * the .. in the child, blow out the entry
2601                          */
2602                         if (is_inode_reached(irec, ino_offset))  {
2603                                 do_warn(
2604         _("entry \"%s\" in directory inode %" PRIu64
2605           " references already connected inode %" PRIu64 ".\n"),
2606                                         fname, ino, lino);
2607                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2608                                                 lino, &max_size, &i,
2609                                                 &bytes_deleted, ino_dirty);
2610                                 continue;
2611                         } else if (parent == ino)  {
2612                                 add_inode_reached(irec, ino_offset);
2613                                 add_inode_ref(current_irec, current_ino_offset);
2614                         } else if (parent == NULLFSINO) {
2615                                 /* ".." was missing, but this entry refers to it,
2616                                 so, set it as the parent and mark for rebuild */
2617                                 do_warn(
2618         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2619                                         fname, ino, lino);
2620                                 set_inode_parent(irec, ino_offset, ino);
2621                                 add_inode_reached(irec, ino_offset);
2622                                 add_inode_ref(current_irec, current_ino_offset);
2623                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2624                                                         irec, ino_offset);
2625                         } else  {
2626                                 do_warn(
2627         _("entry \"%s\" in directory inode %" PRIu64
2628           " not consistent with .. value (%" PRIu64
2629           ") in inode %" PRIu64 ",\n"),
2630                                         fname, ino, parent, lino);
2631                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2632                                                 lino, &max_size, &i,
2633                                                 &bytes_deleted, ino_dirty);
2634                                 continue;
2635                         }
2636                 }
2637
2638                 /* validate ftype field if supported */
2639                 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2640                         uint8_t dir_ftype;
2641                         uint8_t ino_ftype;
2642
2643                         dir_ftype = libxfs_dir2_sf_get_ftype(mp, sfep);
2644                         ino_ftype = get_inode_ftype(irec, ino_offset);
2645
2646                         if (dir_ftype != ino_ftype) {
2647                                 if (no_modify) {
2648                                         do_warn(
2649         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2650                                                 dir_ftype, ino_ftype,
2651                                                 ino, lino);
2652                                 } else {
2653                                         do_warn(
2654         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2655                                                 dir_ftype, ino_ftype,
2656                                                 ino, lino);
2657                                         libxfs_dir2_sf_put_ftype(mp, sfep,
2658                                                                 ino_ftype);
2659                                         dir_hash_update_ftype(hashtab,
2660                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2661                                                               ino_ftype);
2662                                         *ino_dirty = 1;
2663                                 }
2664                         }
2665                 }
2666
2667                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2668                         i8++;
2669
2670                 /*
2671                  * go onto next entry - we have to take entries with bad namelen
2672                  * into account in no modify mode since we calculate size based
2673                  * on next_sfep.
2674                  */
2675                 ASSERT(no_modify || bad_sfnamelen == 0);
2676                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2677                               (bad_sfnamelen
2678                                 ? libxfs_dir2_sf_entsize(mp, sfp, namelen)
2679                                 : libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)));
2680         }
2681
2682         if (sfp->i8count != i8) {
2683                 if (no_modify) {
2684                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2685                                 ino);
2686                 } else {
2687                         if (i8 == 0) {
2688                                 struct xfs_dir2_sf_entry *tmp_sfep;
2689
2690                                 tmp_sfep = next_sfep;
2691                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2692                                 bytes_deleted +=
2693                                         (intptr_t)next_sfep -
2694                                         (intptr_t)tmp_sfep;
2695                                 next_sfep = tmp_sfep;
2696                         } else
2697                                 sfp->i8count = i8;
2698                         *ino_dirty = 1;
2699                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2700                                 ino);
2701                 }
2702         }
2703
2704         /*
2705          * sync up sizes if required
2706          */
2707         if (*ino_dirty && bytes_deleted > 0)  {
2708                 ASSERT(!no_modify);
2709                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2710                 ip->i_d.di_size -= bytes_deleted;
2711         }
2712
2713         if (ip->i_d.di_size != ip->i_df.if_bytes)  {
2714                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2715                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2716                 ip->i_d.di_size = (xfs_fsize_t)
2717                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2718                 do_warn(
2719         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2720                         ip->i_d.di_size);
2721                 *ino_dirty = 1;
2722         }
2723 }
2724
2725 /*
2726  * processes all reachable inodes in directories
2727  */
2728 static void
2729 process_dir_inode(
2730         struct xfs_mount        *mp,
2731         xfs_agnumber_t          agno,
2732         struct ino_tree_node    *irec,
2733         int                     ino_offset)
2734 {
2735         xfs_ino_t               ino;
2736         struct xfs_inode        *ip;
2737         struct xfs_trans        *tp;
2738         struct dir_hash_tab     *hashtab;
2739         int                     need_dot;
2740         int                     dirty, num_illegal, error, nres;
2741
2742         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2743
2744         /*
2745          * open up directory inode, check all entries,
2746          * then call prune_dir_entries to remove all
2747          * remaining illegal directory entries.
2748          */
2749
2750         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2751
2752         error = -libxfs_iget(mp, NULL, ino, 0, &ip);
2753         if (error) {
2754                 if (!no_modify)
2755                         do_error(
2756         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2757                                 ino, error);
2758                 else  {
2759                         do_warn(
2760         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2761                                 ino, error);
2762                         /*
2763                          * see below for what we're doing if this
2764                          * is root.  Why do we need to do this here?
2765                          * to ensure that the root doesn't show up
2766                          * as being disconnected in the no_modify case.
2767                          */
2768                         if (mp->m_sb.sb_rootino == ino)  {
2769                                 add_inode_reached(irec, 0);
2770                                 add_inode_ref(irec, 0);
2771                         }
2772                 }
2773
2774                 add_inode_refchecked(irec, 0);
2775                 return;
2776         }
2777
2778         need_dot = dirty = num_illegal = 0;
2779
2780         if (mp->m_sb.sb_rootino == ino)  {
2781                 /*
2782                  * mark root inode reached and bump up
2783                  * link count for root inode to account
2784                  * for '..' entry since the root inode is
2785                  * never reached by a parent.  we know
2786                  * that root's '..' is always good --
2787                  * guaranteed by phase 3 and/or below.
2788                  */
2789                 add_inode_reached(irec, ino_offset);
2790         }
2791
2792         add_inode_refchecked(irec, ino_offset);
2793
2794         hashtab = dir_hash_init(ip->i_d.di_size);
2795
2796         /*
2797          * look for bogus entries
2798          */
2799         switch (ip->i_df.if_format)  {
2800                 case XFS_DINODE_FMT_EXTENTS:
2801                 case XFS_DINODE_FMT_BTREE:
2802                         /*
2803                          * also check for missing '.' in longform dirs.
2804                          * missing .. entries are added if required when
2805                          * the directory is connected to lost+found. but
2806                          * we need to create '.' entries here.
2807                          */
2808                         longform_dir2_entry_check(mp, ino, ip,
2809                                                 &num_illegal, &need_dot,
2810                                                 irec, ino_offset,
2811                                                 hashtab);
2812                         break;
2813
2814                 case XFS_DINODE_FMT_LOCAL:
2815                         /*
2816                          * using the remove reservation is overkill
2817                          * since at most we'll only need to log the
2818                          * inode but it's easier than wedging a
2819                          * new define in ourselves.
2820                          */
2821                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2822                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2823                                                     nres, 0, 0, &tp);
2824                         if (error)
2825                                 res_failed(error);
2826
2827                         libxfs_trans_ijoin(tp, ip, 0);
2828
2829                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2830                                                 irec, ino_offset,
2831                                                 hashtab);
2832
2833                         ASSERT(dirty == 0 || (dirty && !no_modify));
2834                         if (dirty)  {
2835                                 libxfs_trans_log_inode(tp, ip,
2836                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
2837                                 error = -libxfs_trans_commit(tp);
2838                                 if (error)
2839                                         do_error(
2840 _("error %d fixing shortform directory %llu\n"),
2841                                                 error,
2842                                                 (unsigned long long)ip->i_ino);
2843                         } else  {
2844                                 libxfs_trans_cancel(tp);
2845                         }
2846                         break;
2847
2848                 default:
2849                         break;
2850         }
2851         dir_hash_done(hashtab);
2852
2853         /*
2854          * if we have to create a .. for /, do it now *before*
2855          * we delete the bogus entries, otherwise the directory
2856          * could transform into a shortform dir which would
2857          * probably cause the simulation to choke.  Even
2858          * if the illegal entries get shifted around, it's ok
2859          * because the entries are structurally intact and in
2860          * in hash-value order so the simulation won't get confused
2861          * if it has to move them around.
2862          */
2863         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2864                 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
2865
2866                 do_warn(_("recreating root directory .. entry\n"));
2867
2868                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
2869                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2870                                             nres, 0, 0, &tp);
2871                 if (error)
2872                         res_failed(error);
2873
2874                 libxfs_trans_ijoin(tp, ip, 0);
2875
2876                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
2877                                         ip->i_ino, nres);
2878                 if (error)
2879                         do_error(
2880         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
2881
2882                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2883                 error = -libxfs_trans_commit(tp);
2884                 if (error)
2885                         do_error(
2886         _("root inode \"..\" entry recreation failed (%d)\n"), error);
2887
2888                 need_root_dotdot = 0;
2889         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2890                 do_warn(_("would recreate root directory .. entry\n"));
2891         }
2892
2893         /*
2894          * if we need to create the '.' entry, do so only if
2895          * the directory is a longform dir.  if it's been
2896          * turned into a shortform dir, then the inode is ok
2897          * since shortform dirs have no '.' entry and the inode
2898          * has already been committed by prune_lf_dir_entry().
2899          */
2900         if (need_dot)  {
2901                 /*
2902                  * bump up our link count but don't
2903                  * bump up the inode link count.  chances
2904                  * are good that even though we lost '.'
2905                  * the inode link counts reflect '.' so
2906                  * leave the inode link count alone and if
2907                  * it turns out to be wrong, we'll catch
2908                  * that in phase 7.
2909                  */
2910                 add_inode_ref(irec, ino_offset);
2911
2912                 if (no_modify)  {
2913                         do_warn(
2914         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
2915                                 ino);
2916                 } else if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)  {
2917                         /*
2918                          * need to create . entry in longform dir.
2919                          */
2920                         do_warn(
2921         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
2922
2923                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
2924                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2925                                                     nres, 0, 0, &tp);
2926                         if (error)
2927                                 res_failed(error);
2928
2929                         libxfs_trans_ijoin(tp, ip, 0);
2930
2931                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
2932                                         ip->i_ino, nres);
2933                         if (error)
2934                                 do_error(
2935         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
2936                                         ino, error);
2937
2938                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2939                         error = -libxfs_trans_commit(tp);
2940                         if (error)
2941                                 do_error(
2942         _("root inode \".\" entry recreation failed (%d)\n"), error);
2943                 }
2944         }
2945         libxfs_irele(ip);
2946 }
2947
2948 /*
2949  * mark realtime bitmap and summary inodes as reached.
2950  * quota inode will be marked here as well
2951  */
2952 static void
2953 mark_standalone_inodes(xfs_mount_t *mp)
2954 {
2955         ino_tree_node_t         *irec;
2956         int                     offset;
2957
2958         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
2959                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
2960
2961         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
2962                         irec->ino_startnum;
2963
2964         add_inode_reached(irec, offset);
2965
2966         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
2967                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
2968
2969         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
2970                         irec->ino_startnum;
2971
2972         add_inode_reached(irec, offset);
2973
2974         if (fs_quotas)  {
2975                 if (mp->m_sb.sb_uquotino
2976                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
2977                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2978                                                 mp->m_sb.sb_uquotino),
2979                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
2980                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
2981                                         - irec->ino_startnum;
2982                         add_inode_reached(irec, offset);
2983                 }
2984                 if (mp->m_sb.sb_gquotino
2985                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
2986                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2987                                                 mp->m_sb.sb_gquotino),
2988                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
2989                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
2990                                         - irec->ino_startnum;
2991                         add_inode_reached(irec, offset);
2992                 }
2993                 if (mp->m_sb.sb_pquotino
2994                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
2995                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2996                                                 mp->m_sb.sb_pquotino),
2997                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
2998                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
2999                                         - irec->ino_startnum;
3000                         add_inode_reached(irec, offset);
3001                 }
3002         }
3003 }
3004
3005 static void
3006 check_for_orphaned_inodes(
3007         xfs_mount_t             *mp,
3008         xfs_agnumber_t          agno,
3009         ino_tree_node_t         *irec)
3010 {
3011         int                     i;
3012         xfs_ino_t               ino;
3013
3014         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3015                 ASSERT(is_inode_confirmed(irec, i));
3016                 if (is_inode_free(irec, i))
3017                         continue;
3018
3019                 if (is_inode_reached(irec, i))
3020                         continue;
3021
3022                 ASSERT(inode_isadir(irec, i) ||
3023                         num_inode_references(irec, i) == 0);
3024
3025                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3026                 if (inode_isadir(irec, i))
3027                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3028                 else
3029                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3030                 if (!no_modify)  {
3031                         if (!orphanage_ino)
3032                                 orphanage_ino = mk_orphanage(mp);
3033                         do_warn(_("moving to %s\n"), ORPHANAGE);
3034                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3035                 } else  {
3036                         do_warn(_("would move to %s\n"), ORPHANAGE);
3037                 }
3038                 /*
3039                  * for read-only case, even though the inode isn't
3040                  * really reachable, set the flag (and bump our link
3041                  * count) anyway to fool phase 7
3042                  */
3043                 add_inode_reached(irec, i);
3044         }
3045 }
3046
3047 static void
3048 do_dir_inode(
3049         struct workqueue        *wq,
3050         xfs_agnumber_t          agno,
3051         void                    *arg)
3052 {
3053         struct ino_tree_node    *irec = arg;
3054         int                     i;
3055
3056         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3057                 if (inode_isadir(irec, i))
3058                         process_dir_inode(wq->wq_ctx, agno, irec, i);
3059         }
3060 }
3061
3062 static void
3063 traverse_function(
3064         struct workqueue        *wq,
3065         xfs_agnumber_t          agno,
3066         void                    *arg)
3067 {
3068         struct ino_tree_node    *irec;
3069         prefetch_args_t         *pf_args = arg;
3070         struct workqueue        lwq;
3071         struct xfs_mount        *mp = wq->wq_ctx;
3072
3073         wait_for_inode_prefetch(pf_args);
3074
3075         if (verbose)
3076                 do_log(_("        - agno = %d\n"), agno);
3077
3078         /*
3079          * The more AGs we have in flight at once, the fewer processing threads
3080          * per AG. This means we don't overwhelm the machine with hundreds of
3081          * threads when we start acting on lots of AGs at once. We just want
3082          * enough that we can keep multiple CPUs busy across multiple AGs.
3083          */
3084         workqueue_create_bound(&lwq, mp, ag_stride, 1000);
3085
3086         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3087                 if (irec->ino_isa_dir == 0)
3088                         continue;
3089
3090                 if (pf_args) {
3091                         sem_post(&pf_args->ra_count);
3092 #ifdef XR_PF_TRACE
3093                         {
3094                         int     i;
3095                         sem_getvalue(&pf_args->ra_count, &i);
3096                         pftrace(
3097                 "processing inode chunk %p in AG %d (sem count = %d)",
3098                                 irec, agno, i);
3099                         }
3100 #endif
3101                 }
3102
3103                 queue_work(&lwq, do_dir_inode, agno, irec);
3104         }
3105         destroy_work_queue(&lwq);
3106         cleanup_inode_prefetch(pf_args);
3107 }
3108
3109 static void
3110 update_missing_dotdot_entries(
3111         xfs_mount_t             *mp)
3112 {
3113         dotdot_update_t         *dir;
3114
3115         /*
3116          * these entries parents were updated, rebuild them again
3117          * set dotdot_update flag so processing routines do not count links
3118          */
3119         dotdot_update = 1;
3120         while (!list_empty(&dotdot_update_list)) {
3121                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3122                                  list);
3123                 list_del(&dir->list);
3124                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3125                 free(dir);
3126         }
3127 }
3128
3129 static void
3130 traverse_ags(
3131         struct xfs_mount        *mp)
3132 {
3133         do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
3134 }
3135
3136 void
3137 phase6(xfs_mount_t *mp)
3138 {
3139         ino_tree_node_t         *irec;
3140         int                     i;
3141
3142         memset(&zerocr, 0, sizeof(struct cred));
3143         memset(&zerofsx, 0, sizeof(struct fsxattr));
3144         orphanage_ino = 0;
3145
3146         do_log(_("Phase 6 - check inode connectivity...\n"));
3147
3148         incore_ext_teardown(mp);
3149
3150         add_ino_ex_data(mp);
3151
3152         /*
3153          * verify existence of root directory - if we have to
3154          * make one, it's ok for the incore data structs not to
3155          * know about it since everything about it (and the other
3156          * inodes in its chunk if a new chunk was created) are ok
3157          */
3158         if (need_root_inode)  {
3159                 if (!no_modify)  {
3160                         do_warn(_("reinitializing root directory\n"));
3161                         mk_root_dir(mp);
3162                         need_root_inode = 0;
3163                         need_root_dotdot = 0;
3164                 } else  {
3165                         do_warn(_("would reinitialize root directory\n"));
3166                 }
3167         }
3168
3169         if (need_rbmino)  {
3170                 if (!no_modify)  {
3171                         do_warn(_("reinitializing realtime bitmap inode\n"));
3172                         mk_rbmino(mp);
3173                         need_rbmino = 0;
3174                 } else  {
3175                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3176                 }
3177         }
3178
3179         if (need_rsumino)  {
3180                 if (!no_modify)  {
3181                         do_warn(_("reinitializing realtime summary inode\n"));
3182                         mk_rsumino(mp);
3183                         need_rsumino = 0;
3184                 } else  {
3185                         do_warn(_("would reinitialize realtime summary inode\n"));
3186                 }
3187         }
3188
3189         if (!no_modify)  {
3190                 do_log(
3191 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3192                 if (fill_rbmino(mp))  {
3193                         do_warn(
3194                         _("Warning:  realtime bitmap may be inconsistent\n"));
3195                 }
3196
3197                 if (fill_rsumino(mp))  {
3198                         do_warn(
3199                         _("Warning:  realtime bitmap may be inconsistent\n"));
3200                 }
3201         }
3202
3203         mark_standalone_inodes(mp);
3204
3205         do_log(_("        - traversing filesystem ...\n"));
3206
3207         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3208                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3209
3210         /*
3211          * we always have a root inode, even if it's free...
3212          * if the root is free, forget it, lost+found is already gone
3213          */
3214         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3215                 need_root_inode = 1;
3216         }
3217
3218         /*
3219          * then process all inodes by walking incore inode tree
3220          */
3221         traverse_ags(mp);
3222
3223         /*
3224          * any directories that had updated ".." entries, rebuild them now
3225          */
3226         update_missing_dotdot_entries(mp);
3227
3228         do_log(_("        - traversal finished ...\n"));
3229         do_log(_("        - moving disconnected inodes to %s ...\n"),
3230                 ORPHANAGE);
3231
3232         /*
3233          * move all disconnected inodes to the orphanage
3234          */
3235         for (i = 0; i < glob_agcount; i++)  {
3236                 irec = findfirst_inode_rec(i);
3237                 while (irec != NULL)  {
3238                         check_for_orphaned_inodes(mp, i, irec);
3239                         irec = next_ino_rec(irec);
3240                 }
3241         }
3242 }