repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "threads.h"
  10 #include "prefetch.h"
  11 #include "avl.h"
  12 #include "globals.h"
  13 #include "agheader.h"
  14 #include "incore.h"
  15 #include "dir2.h"
  16 #include "protos.h"
  17 #include "err_protos.h"
  18 #include "dinode.h"
  19 #include "progress.h"
  20 #include "versions.h"
  21
  22 static struct cred              zerocr;
  23 static struct fsxattr           zerofsx;
  24 static xfs_ino_t                orphanage_ino;
  25
  26 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  27                                                 1,
  28                                                 XFS_DIR3_FT_DIR};
  29
  30 /*
  31  * Data structures used to keep track of directories where the ".."
  32  * entries are updated. These must be rebuilt after the initial pass
  33  */
  34 typedef struct dotdot_update {
  35         struct list_head        list;
  36         ino_tree_node_t         *irec;
  37         xfs_agnumber_t          agno;
  38         int                     ino_offset;
  39 } dotdot_update_t;
  40
  41 static LIST_HEAD(dotdot_update_list);
  42 static int                      dotdot_update;
  43
  44 static void
  45 add_dotdot_update(
  46         xfs_agnumber_t          agno,
  47         ino_tree_node_t         *irec,
  48         int                     ino_offset)
  49 {
  50         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
  51
  52         if (!dir)
  53                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
  54                         sizeof(dotdot_update_t));
  55
  56         INIT_LIST_HEAD(&dir->list);
  57         dir->irec = irec;
  58         dir->agno = agno;
  59         dir->ino_offset = ino_offset;
  60
  61         list_add(&dir->list, &dotdot_update_list);
  62 }
  63
  64 /*
  65  * Data structures and routines to keep track of directory entries
  66  * and whether their leaf entry has been seen. Also used for name
  67  * duplicate checking and rebuilding step if required.
  68  */
  69 struct dir_hash_ent {
  70         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
  71         struct dir_hash_ent     *nextbyorder;   /* next in order added */
  72         xfs_dahash_t            hashval;        /* hash value of name */
  73         uint32_t                address;        /* offset of data entry */
  74         xfs_ino_t               inum;           /* inode num of entry */
  75         short                   junkit;         /* name starts with / */
  76         short                   seen;           /* have seen leaf entry */
  77         struct xfs_name         name;
  78         unsigned char           namebuf[];
  79 };
  80
  81 struct dir_hash_tab {
  82         int                     size;           /* size of hash tables */
  83         struct dir_hash_ent     *first;         /* ptr to first added entry */
  84         struct dir_hash_ent     *last;          /* ptr to last added entry */
  85         struct dir_hash_ent     **byhash;       /* ptr to name hash buckets */
  86 #define HT_UNSEEN               1
  87         struct radix_tree_root  byaddr;
  88 };
  89
  90 #define DIR_HASH_TAB_SIZE(n)    \
  91         (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
  92 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
  93
  94 /*
  95  * Track the contents of the freespace table in a directory.
  96  */
  97 typedef struct freetab {
  98         int                     naents; /* expected number of data blocks */
  99         int                     nents;  /* number of data blocks processed */
 100         struct freetab_ent {
 101                 xfs_dir2_data_off_t     v;
 102                 short                   s;
 103         } ents[1];
 104 } freetab_t;
 105 #define FREETAB_SIZE(n) \
 106         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 107
 108 #define DIR_HASH_CK_OK          0
 109 #define DIR_HASH_CK_DUPLEAF     1
 110 #define DIR_HASH_CK_BADHASH     2
 111 #define DIR_HASH_CK_NODATA      3
 112 #define DIR_HASH_CK_NOLEAF      4
 113 #define DIR_HASH_CK_BADSTALE    5
 114 #define DIR_HASH_CK_TOTAL       6
 115
 116 /*
 117  * Need to handle CRC and validation errors specially here. If there is a
 118  * validator error, re-read without the verifier so that we get a buffer we can
 119  * check and repair. Re-attach the ops to the buffer after the read so that when
 120  * it is rewritten the CRC is recalculated.
 121  *
 122  * If the buffer was not read, we return an error. If the buffer was read but
 123  * had a CRC or corruption error, we reread it without the verifier and if it is
 124  * read successfully we increment *crc_error and return 0. Otherwise we
 125  * return the read error.
 126  */
 127 static int
 128 dir_read_buf(
 129         struct xfs_inode        *ip,
 130         xfs_dablk_t             bno,
 131         struct xfs_buf          **bpp,
 132         const struct xfs_buf_ops *ops,
 133         int                     *crc_error)
 134 {
 135         int error;
 136         int error2;
 137
 138         error = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK, ops);
 139
 140         if (error != EFSBADCRC && error != EFSCORRUPTED)
 141                 return error;
 142
 143         error2 = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK,
 144                         NULL);
 145         if (error2)
 146                 return error2;
 147
 148         (*crc_error)++;
 149         (*bpp)->b_ops = ops;
 150         return 0;
 151 }
 152
 153 /*
 154  * Returns 0 if the name already exists (ie. a duplicate)
 155  */
 156 static int
 157 dir_hash_add(
 158         struct xfs_mount        *mp,
 159         struct dir_hash_tab     *hashtab,
 160         uint32_t                addr,
 161         xfs_ino_t               inum,
 162         int                     namelen,
 163         unsigned char           *name,
 164         uint8_t                 ftype)
 165 {
 166         xfs_dahash_t            hash = 0;
 167         int                     byhash = 0;
 168         struct dir_hash_ent     *p;
 169         int                     dup;
 170         short                   junk;
 171         struct xfs_name         xname;
 172         int                     error;
 173
 174         xname.name = name;
 175         xname.len = namelen;
 176         xname.type = ftype;
 177
 178         junk = name[0] == '/';
 179         dup = 0;
 180
 181         if (!junk) {
 182                 hash = libxfs_dir2_hashname(mp, &xname);
 183                 byhash = DIR_HASH_FUNC(hashtab, hash);
 184
 185                 /*
 186                  * search hash bucket for existing name.
 187                  */
 188                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 189                         if (p->hashval == hash && p->name.len == namelen) {
 190                                 if (memcmp(p->name.name, name, namelen) == 0) {
 191                                         dup = 1;
 192                                         junk = 1;
 193                                         break;
 194                                 }
 195                         }
 196                 }
 197         }
 198
 199         /*
 200          * Allocate enough space for the hash entry and the name in a single
 201          * allocation so we can store our own copy of the name for later use.
 202          */
 203         p = calloc(1, sizeof(*p) + namelen + 1);
 204         if (!p)
 205                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 206                         sizeof(*p));
 207
 208         error = radix_tree_insert(&hashtab->byaddr, addr, p);
 209         if (error == EEXIST) {
 210                 do_warn(_("duplicate addrs %u in directory!\n"), addr);
 211                 free(p);
 212                 return 0;
 213         }
 214         radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
 215
 216         if (hashtab->last)
 217                 hashtab->last->nextbyorder = p;
 218         else
 219                 hashtab->first = p;
 220         p->nextbyorder = NULL;
 221         hashtab->last = p;
 222
 223         if (!(p->junkit = junk)) {
 224                 p->hashval = hash;
 225                 p->nextbyhash = hashtab->byhash[byhash];
 226                 hashtab->byhash[byhash] = p;
 227         }
 228         p->address = addr;
 229         p->inum = inum;
 230         p->seen = 0;
 231
 232         /* Set up the name in the region trailing the hash entry. */
 233         memcpy(p->namebuf, name, namelen);
 234         p->name.name = p->namebuf;
 235         p->name.len = namelen;
 236         p->name.type = ftype;
 237         return !dup;
 238 }
 239
 240 /* Mark an existing directory hashtable entry as junk. */
 241 static void
 242 dir_hash_junkit(
 243         struct dir_hash_tab     *hashtab,
 244         xfs_dir2_dataptr_t      addr)
 245 {
 246         struct dir_hash_ent     *p;
 247
 248         p = radix_tree_lookup(&hashtab->byaddr, addr);
 249         assert(p != NULL);
 250
 251         p->junkit = 1;
 252         p->namebuf[0] = '/';
 253 }
 254
 255 static int
 256 dir_hash_check(
 257         struct dir_hash_tab     *hashtab,
 258         struct xfs_inode        *ip,
 259         int                     seeval)
 260 {
 261         static char             *seevalstr[DIR_HASH_CK_TOTAL];
 262         static int              done;
 263
 264         if (!done) {
 265                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 266                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 267                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 268                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 269                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 270                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 271                 done = 1;
 272         }
 273
 274         if (seeval == DIR_HASH_CK_OK &&
 275             radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
 276                 seeval = DIR_HASH_CK_NOLEAF;
 277         if (seeval == DIR_HASH_CK_OK)
 278                 return 0;
 279         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 280                 ip->i_ino, seevalstr[seeval]);
 281         if (!no_modify)
 282                 do_warn(_("rebuilding\n"));
 283         else
 284                 do_warn(_("would rebuild\n"));
 285         return 1;
 286 }
 287
 288 static void
 289 dir_hash_done(
 290         struct dir_hash_tab     *hashtab)
 291 {
 292         int                     i;
 293         struct dir_hash_ent     *n;
 294         struct dir_hash_ent     *p;
 295
 296         for (i = 0; i < hashtab->size; i++) {
 297                 for (p = hashtab->byhash[i]; p; p = n) {
 298                         n = p->nextbyhash;
 299                         radix_tree_delete(&hashtab->byaddr, p->address);
 300                         free(p);
 301                 }
 302         }
 303         free(hashtab);
 304 }
 305
 306 /*
 307  * Create a directory hash index structure based on the size of the directory we
 308  * are about to try to repair. The size passed in is the size of the data
 309  * segment of the directory in bytes, so we don't really know exactly how many
 310  * entries are in it. Hence assume an entry size of around 64 bytes - that's a
 311  * name length of 40+ bytes so should cover a most situations with really large
 312  * directories.
 313  */
 314 static struct dir_hash_tab *
 315 dir_hash_init(
 316         xfs_fsize_t             size)
 317 {
 318         struct dir_hash_tab     *hashtab = NULL;
 319         int                     hsize;
 320
 321         hsize = size / 64;
 322         if (hsize < 16)
 323                 hsize = 16;
 324
 325         /*
 326          * Try to allocate as large a hash table as possible. Failure to
 327          * allocate isn't fatal, it will just result in slower performance as we
 328          * reduce the size of the table.
 329          */
 330         while (hsize >= 16) {
 331                 hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1);
 332                 if (hashtab)
 333                         break;
 334                 hsize /= 2;
 335         }
 336         if (!hashtab)
 337                 do_error(_("calloc failed in dir_hash_init\n"));
 338         hashtab->size = hsize;
 339         hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
 340                 sizeof(struct dir_hash_tab));
 341         INIT_RADIX_TREE(&hashtab->byaddr, 0);
 342         return hashtab;
 343 }
 344
 345 static int
 346 dir_hash_see(
 347         struct dir_hash_tab     *hashtab,
 348         xfs_dahash_t            hash,
 349         xfs_dir2_dataptr_t      addr)
 350 {
 351         struct dir_hash_ent     *p;
 352
 353         p = radix_tree_lookup(&hashtab->byaddr, addr);
 354         if (!p)
 355                 return DIR_HASH_CK_NODATA;
 356         if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
 357                 return DIR_HASH_CK_DUPLEAF;
 358         if (p->junkit == 0 && p->hashval != hash)
 359                 return DIR_HASH_CK_BADHASH;
 360         radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
 361         return DIR_HASH_CK_OK;
 362 }
 363
 364 static void
 365 dir_hash_update_ftype(
 366         struct dir_hash_tab     *hashtab,
 367         xfs_dir2_dataptr_t      addr,
 368         uint8_t                 ftype)
 369 {
 370         struct dir_hash_ent     *p;
 371
 372         p = radix_tree_lookup(&hashtab->byaddr, addr);
 373         if (!p)
 374                 return;
 375         p->name.type = ftype;
 376 }
 377
 378 /*
 379  * checks to make sure leafs match a data entry, and that the stale
 380  * count is valid.
 381  */
 382 static int
 383 dir_hash_see_all(
 384         struct dir_hash_tab     *hashtab,
 385         xfs_dir2_leaf_entry_t   *ents,
 386         int                     count,
 387         int                     stale)
 388 {
 389         int                     i;
 390         int                     j;
 391         int                     rval;
 392
 393         for (i = j = 0; i < count; i++) {
 394                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 395                         j++;
 396                         continue;
 397                 }
 398                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 399                                         be32_to_cpu(ents[i].address));
 400                 if (rval != DIR_HASH_CK_OK)
 401                         return rval;
 402         }
 403         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 404 }
 405
 406 /*
 407  * Given a block number in a fork, return the next valid block number (not a
 408  * hole).  If this is the last block number then NULLFILEOFF is returned.
 409  */
 410 static int
 411 bmap_next_offset(
 412         struct xfs_inode        *ip,
 413         xfs_fileoff_t           *bnop)
 414 {
 415         xfs_fileoff_t           bno;
 416         int                     error;
 417         struct xfs_bmbt_irec    got;
 418         struct xfs_iext_cursor  icur;
 419
 420         switch (ip->i_df.if_format) {
 421         case XFS_DINODE_FMT_LOCAL:
 422                 *bnop = NULLFILEOFF;
 423                 return 0;
 424         case XFS_DINODE_FMT_BTREE:
 425         case XFS_DINODE_FMT_EXTENTS:
 426                 break;
 427         default:
 428                 return EIO;
 429         }
 430
 431         /* Read extent map. */
 432         error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
 433         if (error)
 434                 return error;
 435
 436         bno = *bnop + 1;
 437         if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
 438                 *bnop = NULLFILEOFF;
 439         else
 440                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 441         return 0;
 442 }
 443
 444 static void
 445 res_failed(
 446         int     err)
 447 {
 448         if (err == ENOSPC) {
 449                 do_error(_("ran out of disk space!\n"));
 450         } else
 451                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 452 }
 453
 454 static inline void
 455 reset_inode_fields(struct xfs_inode *ip)
 456 {
 457         ip->i_projid = 0;
 458         ip->i_disk_size = 0;
 459         ip->i_nblocks = 0;
 460         ip->i_extsize = 0;
 461         ip->i_cowextsize = 0;
 462         ip->i_flushiter = 0;
 463         ip->i_forkoff = 0;
 464         ip->i_diflags = 0;
 465         ip->i_diflags2 = 0;
 466         ip->i_crtime.tv_sec = 0;
 467         ip->i_crtime.tv_nsec = 0;
 468 }
 469
 470 static void
 471 mk_rbmino(xfs_mount_t *mp)
 472 {
 473         xfs_trans_t     *tp;
 474         xfs_inode_t     *ip;
 475         xfs_bmbt_irec_t *ep;
 476         int             i;
 477         int             nmap;
 478         int             error;
 479         xfs_fileoff_t   bno;
 480         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 481         int             times;
 482         uint            blocks;
 483
 484         /*
 485          * first set up inode
 486          */
 487         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 488         if (i)
 489                 res_failed(i);
 490
 491         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 492         if (error) {
 493                 do_error(
 494                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 495                         error);
 496         }
 497
 498         reset_inode_fields(ip);
 499
 500         VFS_I(ip)->i_mode = S_IFREG;
 501         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 502         if (ip->i_afp)
 503                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 504
 505         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 506
 507         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 508         if (xfs_has_v3inodes(mp)) {
 509                 VFS_I(ip)->i_version = 1;
 510                 ip->i_diflags2 = 0;
 511                 times |= XFS_ICHGTIME_CREATE;
 512         }
 513         libxfs_trans_ichgtime(tp, ip, times);
 514
 515         /*
 516          * now the ifork
 517          */
 518         ip->i_df.if_bytes = 0;
 519         ip->i_df.if_u1.if_root = NULL;
 520
 521         ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 522
 523         /*
 524          * commit changes
 525          */
 526         libxfs_trans_ijoin(tp, ip, 0);
 527         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 528         error = -libxfs_trans_commit(tp);
 529         if (error)
 530                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 531
 532         /*
 533          * then allocate blocks for file and fill with zeroes (stolen
 534          * from mkfs)
 535          */
 536         blocks = mp->m_sb.sb_rbmblocks +
 537                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 538         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 539         if (error)
 540                 res_failed(error);
 541
 542         libxfs_trans_ijoin(tp, ip, 0);
 543         bno = 0;
 544         while (bno < mp->m_sb.sb_rbmblocks) {
 545                 nmap = XFS_BMAP_MAX_NMAP;
 546                 error = -libxfs_bmapi_write(tp, ip, bno,
 547                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 548                           0, mp->m_sb.sb_rbmblocks, map, &nmap);
 549                 if (error) {
 550                         do_error(
 551                         _("couldn't allocate realtime bitmap, error = %d\n"),
 552                                 error);
 553                 }
 554                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 555                         libxfs_device_zero(mp->m_ddev_targp,
 556                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 557                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 558                         bno += ep->br_blockcount;
 559                 }
 560         }
 561         error = -libxfs_trans_commit(tp);
 562         if (error) {
 563                 do_error(
 564                 _("allocation of the realtime bitmap failed, error = %d\n"),
 565                         error);
 566         }
 567         libxfs_irele(ip);
 568 }
 569
 570 static int
 571 fill_rbmino(xfs_mount_t *mp)
 572 {
 573         struct xfs_buf  *bp;
 574         xfs_trans_t     *tp;
 575         xfs_inode_t     *ip;
 576         xfs_rtword_t    *bmp;
 577         int             nmap;
 578         int             error;
 579         xfs_fileoff_t   bno;
 580         xfs_bmbt_irec_t map;
 581
 582         bmp = btmcompute;
 583         bno = 0;
 584
 585         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 586         if (error)
 587                 res_failed(error);
 588
 589         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 590         if (error) {
 591                 do_error(
 592                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 593                         error);
 594         }
 595
 596         while (bno < mp->m_sb.sb_rbmblocks)  {
 597                 /*
 598                  * fill the file one block at a time
 599                  */
 600                 nmap = 1;
 601                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 602                 if (error || nmap != 1) {
 603                         do_error(
 604         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 605                                 bno, error);
 606                 }
 607
 608                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 609
 610                 error = -libxfs_trans_read_buf(
 611                                 mp, tp, mp->m_dev,
 612                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 613                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 614
 615                 if (error) {
 616                         do_warn(
 617 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 618                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 619                         return(1);
 620                 }
 621
 622                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 623
 624                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 625
 626                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 627                 bno++;
 628         }
 629
 630         libxfs_trans_ijoin(tp, ip, 0);
 631         error = -libxfs_trans_commit(tp);
 632         if (error)
 633                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 634         libxfs_irele(ip);
 635         return(0);
 636 }
 637
 638 static int
 639 fill_rsumino(xfs_mount_t *mp)
 640 {
 641         struct xfs_buf  *bp;
 642         xfs_trans_t     *tp;
 643         xfs_inode_t     *ip;
 644         xfs_suminfo_t   *smp;
 645         int             nmap;
 646         int             error;
 647         xfs_fileoff_t   bno;
 648         xfs_fileoff_t   end_bno;
 649         xfs_bmbt_irec_t map;
 650
 651         smp = sumcompute;
 652         bno = 0;
 653         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 654
 655         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 656         if (error)
 657                 res_failed(error);
 658
 659         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 660         if (error) {
 661                 do_error(
 662                 _("couldn't iget realtime summary inode -- error - %d\n"),
 663                         error);
 664         }
 665
 666         while (bno < end_bno)  {
 667                 /*
 668                  * fill the file one block at a time
 669                  */
 670                 nmap = 1;
 671                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 672                 if (error || nmap != 1) {
 673                         do_error(
 674         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 675                                 bno, error);
 676                 }
 677
 678                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 679
 680                 error = -libxfs_trans_read_buf(
 681                                 mp, tp, mp->m_dev,
 682                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 683                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 684
 685                 if (error) {
 686                         do_warn(
 687 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 688                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 689                         libxfs_irele(ip);
 690                         return(1);
 691                 }
 692
 693                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 694
 695                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 696
 697                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 698                 bno++;
 699         }
 700
 701         libxfs_trans_ijoin(tp, ip, 0);
 702         error = -libxfs_trans_commit(tp);
 703         if (error)
 704                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 705         libxfs_irele(ip);
 706         return(0);
 707 }
 708
 709 static void
 710 mk_rsumino(xfs_mount_t *mp)
 711 {
 712         xfs_trans_t     *tp;
 713         xfs_inode_t     *ip;
 714         xfs_bmbt_irec_t *ep;
 715         int             i;
 716         int             nmap;
 717         int             error;
 718         int             nsumblocks;
 719         xfs_fileoff_t   bno;
 720         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 721         int             times;
 722         uint            blocks;
 723
 724         /*
 725          * first set up inode
 726          */
 727         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 728         if (i)
 729                 res_failed(i);
 730
 731         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 732         if (error) {
 733                 do_error(
 734                 _("couldn't iget realtime summary inode -- error - %d\n"),
 735                         error);
 736         }
 737
 738         reset_inode_fields(ip);
 739
 740         VFS_I(ip)->i_mode = S_IFREG;
 741         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 742         if (ip->i_afp)
 743                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 744
 745         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 746
 747         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 748         if (xfs_has_v3inodes(mp)) {
 749                 VFS_I(ip)->i_version = 1;
 750                 ip->i_diflags2 = 0;
 751                 times |= XFS_ICHGTIME_CREATE;
 752         }
 753         libxfs_trans_ichgtime(tp, ip, times);
 754
 755         /*
 756          * now the ifork
 757          */
 758         ip->i_df.if_bytes = 0;
 759         ip->i_df.if_u1.if_root = NULL;
 760
 761         ip->i_disk_size = mp->m_rsumsize;
 762
 763         /*
 764          * commit changes
 765          */
 766         libxfs_trans_ijoin(tp, ip, 0);
 767         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 768         error = -libxfs_trans_commit(tp);
 769         if (error)
 770                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 771
 772         /*
 773          * then allocate blocks for file and fill with zeroes (stolen
 774          * from mkfs)
 775          */
 776         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 777         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 778         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 779         if (error)
 780                 res_failed(error);
 781
 782         libxfs_trans_ijoin(tp, ip, 0);
 783         bno = 0;
 784         while (bno < nsumblocks) {
 785                 nmap = XFS_BMAP_MAX_NMAP;
 786                 error = -libxfs_bmapi_write(tp, ip, bno,
 787                           (xfs_extlen_t)(nsumblocks - bno),
 788                           0, nsumblocks, map, &nmap);
 789                 if (error) {
 790                         do_error(
 791                 _("couldn't allocate realtime summary inode, error = %d\n"),
 792                                 error);
 793                 }
 794                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 795                         libxfs_device_zero(mp->m_ddev_targp,
 796                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 797                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 798                         bno += ep->br_blockcount;
 799                 }
 800         }
 801         error = -libxfs_trans_commit(tp);
 802         if (error) {
 803                 do_error(
 804         _("allocation of the realtime summary ino failed, error = %d\n"),
 805                         error);
 806         }
 807         libxfs_irele(ip);
 808 }
 809
 810 /*
 811  * makes a new root directory.
 812  */
 813 static void
 814 mk_root_dir(xfs_mount_t *mp)
 815 {
 816         xfs_trans_t     *tp;
 817         xfs_inode_t     *ip;
 818         int             i;
 819         int             error;
 820         const mode_t    mode = 0755;
 821         ino_tree_node_t *irec;
 822         int             times;
 823
 824         ip = NULL;
 825         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 826         if (i)
 827                 res_failed(i);
 828
 829         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
 830         if (error) {
 831                 do_error(_("could not iget root inode -- error - %d\n"), error);
 832         }
 833
 834         /*
 835          * take care of the core -- initialization from xfs_ialloc()
 836          */
 837         reset_inode_fields(ip);
 838
 839         VFS_I(ip)->i_mode = mode|S_IFDIR;
 840         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 841         if (ip->i_afp)
 842                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 843
 844         set_nlink(VFS_I(ip), 2);        /* account for . and .. */
 845
 846         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 847         if (xfs_has_v3inodes(mp)) {
 848                 VFS_I(ip)->i_version = 1;
 849                 ip->i_diflags2 = 0;
 850                 times |= XFS_ICHGTIME_CREATE;
 851         }
 852         libxfs_trans_ichgtime(tp, ip, times);
 853         libxfs_trans_ijoin(tp, ip, 0);
 854         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 855
 856         /*
 857          * now the ifork
 858          */
 859         ip->i_df.if_bytes = 0;
 860         ip->i_df.if_u1.if_root = NULL;
 861
 862         /*
 863          * initialize the directory
 864          */
 865         libxfs_dir_init(tp, ip, ip);
 866
 867         error = -libxfs_trans_commit(tp);
 868         if (error)
 869                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 870
 871         libxfs_irele(ip);
 872
 873         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 874                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 875         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 876                                 irec->ino_startnum);
 877 }
 878
 879 /*
 880  * orphanage name == lost+found
 881  */
 882 static xfs_ino_t
 883 mk_orphanage(xfs_mount_t *mp)
 884 {
 885         xfs_ino_t       ino;
 886         xfs_trans_t     *tp;
 887         xfs_inode_t     *ip;
 888         xfs_inode_t     *pip;
 889         ino_tree_node_t *irec;
 890         int             ino_offset = 0;
 891         int             i;
 892         int             error;
 893         const int       mode = 0755;
 894         int             nres;
 895         struct xfs_name xname;
 896
 897         /*
 898          * check for an existing lost+found first, if it exists, return
 899          * its inode. Otherwise, we can create it. Bad lost+found inodes
 900          * would have been cleared in phase3 and phase4.
 901          */
 902
 903         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 904         if (i)
 905                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 906                         i, ORPHANAGE);
 907
 908         xname.name = (unsigned char *)ORPHANAGE;
 909         xname.len = strlen(ORPHANAGE);
 910         xname.type = XFS_DIR3_FT_DIR;
 911
 912         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 913                 return ino;
 914
 915         /*
 916          * could not be found, create it
 917          */
 918         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 919         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 920         if (i)
 921                 res_failed(i);
 922
 923         /*
 924          * use iget/ijoin instead of trans_iget because the ialloc
 925          * wrapper can commit the transaction and start a new one
 926          */
 927 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 928         if (i)
 929                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 930                         i, ORPHANAGE);*/
 931
 932         error = -libxfs_dir_ialloc(&tp, pip, mode|S_IFDIR,
 933                                         1, 0, &zerocr, &zerofsx, &ip);
 934         if (error) {
 935                 do_error(_("%s inode allocation failed %d\n"),
 936                         ORPHANAGE, error);
 937         }
 938         inc_nlink(VFS_I(ip));           /* account for . */
 939         ino = ip->i_ino;
 940
 941         irec = find_inode_rec(mp,
 942                         XFS_INO_TO_AGNO(mp, ino),
 943                         XFS_INO_TO_AGINO(mp, ino));
 944
 945         if (irec == NULL) {
 946                 /*
 947                  * This inode is allocated from a newly created inode
 948                  * chunk and therefore did not exist when inode chunks
 949                  * were processed in phase3. Add this group of inodes to
 950                  * the entry avl tree as if they were discovered in phase3.
 951                  */
 952                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
 953                                             XFS_INO_TO_AGINO(mp, ino));
 954                 alloc_ex_data(irec);
 955
 956                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
 957                         set_inode_free(irec, i);
 958         }
 959
 960         ino_offset = get_inode_offset(mp, ino, irec);
 961
 962         /*
 963          * Mark the inode allocated to lost+found as used in the AVL tree
 964          * so it is not skipped in phase 7
 965          */
 966         set_inode_used(irec, ino_offset);
 967         add_inode_ref(irec, ino_offset);
 968         add_inode_reached(irec, ino_offset);
 969
 970         /*
 971          * now that we know the transaction will stay around,
 972          * add the root inode to it
 973          */
 974         libxfs_trans_ijoin(tp, pip, 0);
 975
 976         /*
 977          * create the actual entry
 978          */
 979         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
 980         if (error)
 981                 do_error(
 982                 _("can't make %s, createname error %d\n"),
 983                         ORPHANAGE, error);
 984
 985         /*
 986          * bump up the link count in the root directory to account
 987          * for .. in the new directory, and update the irec copy of the
 988          * on-disk nlink so we don't fail the link count check later.
 989          */
 990         inc_nlink(VFS_I(pip));
 991         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 992                                   XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 993         add_inode_ref(irec, 0);
 994         set_inode_disk_nlinks(irec, 0, get_inode_disk_nlinks(irec, 0) + 1);
 995
 996         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
 997         libxfs_dir_init(tp, ip, pip);
 998         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 999         error = -libxfs_trans_commit(tp);
1000         if (error) {
1001                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1002                         ORPHANAGE, error);
1003         }
1004         libxfs_irele(ip);
1005         libxfs_irele(pip);
1006
1007         return(ino);
1008 }
1009
1010 /*
1011  * move a file to the orphange.
1012  */
1013 static void
1014 mv_orphanage(
1015         xfs_mount_t             *mp,
1016         xfs_ino_t               ino,            /* inode # to be moved */
1017         int                     isa_dir)        /* 1 if inode is a directory */
1018 {
1019         xfs_inode_t             *orphanage_ip;
1020         xfs_ino_t               entry_ino_num;
1021         xfs_inode_t             *ino_p;
1022         xfs_trans_t             *tp;
1023         int                     err;
1024         unsigned char           fname[MAXPATHLEN + 1];
1025         int                     nres;
1026         int                     incr;
1027         ino_tree_node_t         *irec;
1028         int                     ino_offset = 0;
1029         struct xfs_name         xname;
1030
1031         xname.name = fname;
1032         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1033                                 (unsigned long long)ino);
1034
1035         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip);
1036         if (err)
1037                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1038         /*
1039          * Make sure the filename is unique in the lost+found
1040          */
1041         incr = 0;
1042         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1043                                                                 NULL) == 0)
1044                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1045                                         (unsigned long long)ino, ++incr);
1046
1047         /* Orphans may not have a proper parent, so use custom ops here */
1048         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p);
1049         if (err)
1050                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1051
1052         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1053
1054         if (isa_dir)  {
1055                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1056                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1057                 if (irec)
1058                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1059                                         irec->ino_startnum;
1060                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1061                        XFS_DIRENTER_SPACE_RES(mp, 2);
1062                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1063                                         &entry_ino_num, NULL);
1064                 if (err) {
1065                         ASSERT(err == ENOENT);
1066
1067                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1068                                                   nres, 0, 0, &tp);
1069                         if (err)
1070                                 res_failed(err);
1071
1072                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1073                         libxfs_trans_ijoin(tp, ino_p, 0);
1074
1075                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1076                                                 ino, nres);
1077                         if (err)
1078                                 do_error(
1079         _("name create failed in %s (%d)\n"), ORPHANAGE, err);
1080
1081                         if (irec)
1082                                 add_inode_ref(irec, ino_offset);
1083                         else
1084                                 inc_nlink(VFS_I(orphanage_ip));
1085                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1086
1087                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1088                                         orphanage_ino, nres);
1089                         if (err)
1090                                 do_error(
1091         _("creation of .. entry failed (%d)\n"), err);
1092
1093                         inc_nlink(VFS_I(ino_p));
1094                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1095                         err = -libxfs_trans_commit(tp);
1096                         if (err)
1097                                 do_error(
1098         _("creation of .. entry failed (%d)\n"), err);
1099                 } else  {
1100                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1101                                                   nres, 0, 0, &tp);
1102                         if (err)
1103                                 res_failed(err);
1104
1105                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1106                         libxfs_trans_ijoin(tp, ino_p, 0);
1107
1108
1109                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1110                                                 ino, nres);
1111                         if (err)
1112                                 do_error(
1113         _("name create failed in %s (%d)\n"), ORPHANAGE, err);
1114
1115                         if (irec)
1116                                 add_inode_ref(irec, ino_offset);
1117                         else
1118                                 inc_nlink(VFS_I(orphanage_ip));
1119                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1120
1121                         /*
1122                          * don't replace .. value if it already points
1123                          * to us.  that'll pop a libxfs/kernel ASSERT.
1124                          */
1125                         if (entry_ino_num != orphanage_ino)  {
1126                                 err = -libxfs_dir_replace(tp, ino_p,
1127                                                 &xfs_name_dotdot, orphanage_ino,
1128                                                 nres);
1129                                 if (err)
1130                                         do_error(
1131         _("name replace op failed (%d)\n"), err);
1132                         }
1133
1134                         err = -libxfs_trans_commit(tp);
1135                         if (err)
1136                                 do_error(
1137         _("orphanage name replace op failed (%d)\n"), err);
1138                 }
1139
1140         } else  {
1141                 /*
1142                  * use the remove log reservation as that's
1143                  * more accurate.  we're only creating the
1144                  * links, we're not doing the inode allocation
1145                  * also accounted for in the create
1146                  */
1147                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1148                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1149                                           nres, 0, 0, &tp);
1150                 if (err)
1151                         res_failed(err);
1152
1153                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1154                 libxfs_trans_ijoin(tp, ino_p, 0);
1155
1156                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1157                                                 nres);
1158                 if (err)
1159                         do_error(
1160         _("name create failed in %s (%d)\n"), ORPHANAGE, err);
1161                 ASSERT(err == 0);
1162
1163                 set_nlink(VFS_I(ino_p), 1);
1164                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1165                 err = -libxfs_trans_commit(tp);
1166                 if (err)
1167                         do_error(
1168         _("orphanage name create failed (%d)\n"), err);
1169         }
1170         libxfs_irele(ino_p);
1171         libxfs_irele(orphanage_ip);
1172 }
1173
1174 static int
1175 entry_junked(
1176         const char      *msg,
1177         const char      *iname,
1178         xfs_ino_t       ino1,
1179         xfs_ino_t       ino2)
1180 {
1181         do_warn(msg, iname, ino1, ino2);
1182         if (!no_modify) {
1183                 if (verbose)
1184                         do_warn(_(", marking entry to be junked\n"));
1185                 else
1186                         do_warn("\n");
1187         } else
1188                 do_warn(_(", would junk entry\n"));
1189         return !no_modify;
1190 }
1191
1192 /* Find and invalidate all the directory's buffers. */
1193 static int
1194 dir_binval(
1195         struct xfs_trans        *tp,
1196         struct xfs_inode        *ip,
1197         int                     whichfork)
1198 {
1199         struct xfs_iext_cursor  icur;
1200         struct xfs_bmbt_irec    rec;
1201         struct xfs_ifork        *ifp;
1202         struct xfs_da_geometry  *geo;
1203         struct xfs_buf          *bp;
1204         xfs_dablk_t             dabno;
1205         int                     error = 0;
1206
1207         if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
1208             ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
1209                 return 0;
1210
1211         geo = tp->t_mountp->m_dir_geo;
1212         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1213         for_each_xfs_iext(ifp, &icur, &rec) {
1214                 for (dabno = roundup(rec.br_startoff, geo->fsbcount);
1215                      dabno < rec.br_startoff + rec.br_blockcount;
1216                      dabno += geo->fsbcount) {
1217                         bp = NULL;
1218                         error = -libxfs_da_get_buf(tp, ip, dabno, &bp,
1219                                         whichfork);
1220                         if (error)
1221                                 return error;
1222                         if (!bp)
1223                                 continue;
1224                         libxfs_trans_binval(tp, bp);
1225                         libxfs_trans_brelse(tp, bp);
1226                 }
1227         }
1228
1229         return error;
1230 }
1231
1232 /*
1233  * Unexpected failure during the rebuild will leave the entries in
1234  * lost+found on the next run
1235  */
1236
1237 static void
1238 longform_dir2_rebuild(
1239         struct xfs_mount        *mp,
1240         xfs_ino_t               ino,
1241         struct xfs_inode        *ip,
1242         struct ino_tree_node    *irec,
1243         int                     ino_offset,
1244         struct dir_hash_tab     *hashtab)
1245 {
1246         int                     error;
1247         int                     nres;
1248         struct xfs_trans        *tp;
1249         xfs_fileoff_t           lastblock;
1250         struct xfs_inode        pip;
1251         struct dir_hash_ent     *p;
1252         int                     done = 0;
1253
1254         /*
1255          * trash directory completely and rebuild from scratch using the
1256          * name/inode pairs in the hash table
1257          */
1258
1259         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1260
1261         /*
1262          * first attempt to locate the parent inode, if it can't be
1263          * found, set it to the root inode and it'll be moved to the
1264          * orphanage later (the inode number here needs to be valid
1265          * for the libxfs_dir_init() call).
1266          */
1267         pip.i_ino = get_inode_parent(irec, ino_offset);
1268         if (pip.i_ino == NULLFSINO ||
1269             libxfs_dir_ino_validate(mp, pip.i_ino))
1270                 pip.i_ino = mp->m_sb.sb_rootino;
1271
1272         nres = XFS_REMOVE_SPACE_RES(mp);
1273         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1274         if (error)
1275                 res_failed(error);
1276         libxfs_trans_ijoin(tp, ip, 0);
1277
1278         error = dir_binval(tp, ip, XFS_DATA_FORK);
1279         if (error)
1280                 do_error(_("error %d invalidating directory %llu blocks\n"),
1281                                 error, (unsigned long long)ip->i_ino);
1282
1283         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1284                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1285                         error);
1286
1287         /* free all data, leaf, node and freespace blocks */
1288         while (!done) {
1289                error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA,
1290                                        0, &done);
1291                if (error) {
1292                        do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1293                        goto out_bmap_cancel;
1294                }
1295                error = -libxfs_defer_finish(&tp);
1296                if (error) {
1297                        do_warn(("defer_finish failed -- error - %d\n"), error);
1298                        goto out_bmap_cancel;
1299                }
1300                /*
1301                 * Close out trans and start the next one in the chain.
1302                 */
1303                error = -libxfs_trans_roll_inode(&tp, ip);
1304                if (error)
1305                         goto out_bmap_cancel;
1306         }
1307
1308         error = -libxfs_dir_init(tp, ip, &pip);
1309         if (error) {
1310                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1311                 goto out_bmap_cancel;
1312         }
1313
1314         error = -libxfs_trans_commit(tp);
1315         if (error)
1316                 do_error(
1317         _("dir init failed (%d)\n"), error);
1318
1319         if (ino == mp->m_sb.sb_rootino)
1320                 need_root_dotdot = 0;
1321
1322         /* go through the hash list and re-add the inodes */
1323
1324         for (p = hashtab->first; p; p = p->nextbyorder) {
1325
1326                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1327                                 (p->name.len == 1 || (p->name.len == 2 &&
1328                                                 p->name.name[1] == '.'))))
1329                         continue;
1330
1331                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1332                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1333                                             nres, 0, 0, &tp);
1334                 if (error)
1335                         res_failed(error);
1336
1337                 libxfs_trans_ijoin(tp, ip, 0);
1338
1339                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1340                                                 nres);
1341                 if (error) {
1342                         do_warn(
1343 _("name create failed in ino %" PRIu64 " (%d)\n"), ino, error);
1344                         goto out_bmap_cancel;
1345                 }
1346
1347                 error = -libxfs_trans_commit(tp);
1348                 if (error)
1349                         do_error(
1350 _("name create failed (%d) during rebuild\n"), error);
1351         }
1352
1353         return;
1354
1355 out_bmap_cancel:
1356         libxfs_trans_cancel(tp);
1357         return;
1358 }
1359
1360
1361 /*
1362  * Kill a block in a version 2 inode.
1363  * Makes its own transaction.
1364  */
1365 static void
1366 dir2_kill_block(
1367         xfs_mount_t     *mp,
1368         xfs_inode_t     *ip,
1369         xfs_dablk_t     da_bno,
1370         struct xfs_buf  *bp)
1371 {
1372         xfs_da_args_t   args;
1373         int             error;
1374         int             nres;
1375         xfs_trans_t     *tp;
1376
1377         nres = XFS_REMOVE_SPACE_RES(mp);
1378         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1379         if (error)
1380                 res_failed(error);
1381         libxfs_trans_ijoin(tp, ip, 0);
1382         libxfs_trans_bjoin(tp, bp);
1383         libxfs_trans_bhold(tp, bp);
1384         memset(&args, 0, sizeof(args));
1385         args.dp = ip;
1386         args.trans = tp;
1387         args.whichfork = XFS_DATA_FORK;
1388         args.geo = mp->m_dir_geo;
1389         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1390                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1391         else
1392                 error = -libxfs_dir2_shrink_inode(&args,
1393                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1394         if (error)
1395                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1396                         ip->i_ino, da_bno);
1397         error = -libxfs_trans_commit(tp);
1398         if (error)
1399                 do_error(
1400 _("directory shrink failed (%d)\n"), error);
1401 }
1402
1403 static inline void
1404 check_longform_ftype(
1405         struct xfs_mount        *mp,
1406         struct xfs_inode        *ip,
1407         xfs_dir2_data_entry_t   *dep,
1408         ino_tree_node_t         *irec,
1409         int                     ino_offset,
1410         struct dir_hash_tab     *hashtab,
1411         xfs_dir2_dataptr_t      addr,
1412         struct xfs_da_args      *da,
1413         struct xfs_buf          *bp)
1414 {
1415         xfs_ino_t               inum = be64_to_cpu(dep->inumber);
1416         uint8_t                 dir_ftype;
1417         uint8_t                 ino_ftype;
1418
1419         if (!xfs_has_ftype(mp))
1420                 return;
1421
1422         dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
1423         ino_ftype = get_inode_ftype(irec, ino_offset);
1424
1425         if (dir_ftype == ino_ftype)
1426                 return;
1427
1428         if (no_modify) {
1429                 do_warn(
1430 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1431                         dir_ftype, ino_ftype,
1432                         ip->i_ino, inum);
1433                 return;
1434         }
1435
1436         do_warn(
1437 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1438                 dir_ftype, ino_ftype,
1439                 ip->i_ino, inum);
1440         libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
1441         libxfs_dir2_data_log_entry(da, bp, dep);
1442         dir_hash_update_ftype(hashtab, addr, ino_ftype);
1443 }
1444
1445 /*
1446  * process a data block, also checks for .. entry
1447  * and corrects it to match what we think .. should be
1448  */
1449 static void
1450 longform_dir2_entry_check_data(
1451         struct xfs_mount        *mp,
1452         struct xfs_inode        *ip,
1453         int                     *num_illegal,
1454         int                     *need_dot,
1455         struct ino_tree_node    *current_irec,
1456         int                     current_ino_offset,
1457         struct xfs_buf          *bp,
1458         struct dir_hash_tab     *hashtab,
1459         freetab_t               **freetabp,
1460         xfs_dablk_t             da_bno,
1461         int                     isblock)
1462 {
1463         xfs_dir2_dataptr_t      addr;
1464         xfs_dir2_leaf_entry_t   *blp;
1465         xfs_dir2_block_tail_t   *btp;
1466         struct xfs_dir2_data_hdr *d;
1467         xfs_dir2_db_t           db;
1468         xfs_dir2_data_entry_t   *dep;
1469         xfs_dir2_data_unused_t  *dup;
1470         struct xfs_dir2_data_free *bf;
1471         char                    *endptr;
1472         int                     error;
1473         char                    fname[MAXNAMELEN + 1];
1474         freetab_t               *freetab;
1475         int                     i;
1476         int                     ino_offset;
1477         xfs_ino_t               inum;
1478         ino_tree_node_t         *irec;
1479         int                     junkit;
1480         int                     lastfree;
1481         int                     len;
1482         int                     nbad;
1483         int                     needlog;
1484         int                     needscan;
1485         xfs_ino_t               parent;
1486         char                    *ptr;
1487         xfs_trans_t             *tp;
1488         int                     wantmagic;
1489         struct xfs_da_args      da = {
1490                 .dp = ip,
1491                 .geo = mp->m_dir_geo,
1492         };
1493
1494
1495         d = bp->b_addr;
1496         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1497         nbad = 0;
1498         needscan = needlog = 0;
1499         junkit = 0;
1500         freetab = *freetabp;
1501         if (isblock) {
1502                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1503                 blp = xfs_dir2_block_leaf_p(btp);
1504                 endptr = (char *)blp;
1505                 if (endptr > (char *)btp)
1506                         endptr = (char *)btp;
1507                 if (xfs_has_crc(mp))
1508                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1509                 else
1510                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1511         } else {
1512                 endptr = (char *)d + mp->m_dir_geo->blksize;
1513                 if (xfs_has_crc(mp))
1514                         wantmagic = XFS_DIR3_DATA_MAGIC;
1515                 else
1516                         wantmagic = XFS_DIR2_DATA_MAGIC;
1517         }
1518         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1519
1520         /* check for data block beyond expected end */
1521         if (freetab->naents <= db) {
1522                 struct freetab_ent e;
1523
1524                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1525                 if (!freetab) {
1526                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1527                                 __func__, FREETAB_SIZE(db + 1));
1528                 }
1529                 e.v = NULLDATAOFF;
1530                 e.s = 0;
1531                 for (i = freetab->naents; i < db; i++)
1532                         freetab->ents[i] = e;
1533                 freetab->naents = db + 1;
1534         }
1535
1536         /* check the data block */
1537         while (ptr < endptr) {
1538
1539                 /* check for freespace */
1540                 dup = (xfs_dir2_data_unused_t *)ptr;
1541                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1542
1543                         /* check for invalid freespace length */
1544                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1545                                         be16_to_cpu(dup->length) == 0 ||
1546                                         (be16_to_cpu(dup->length) &
1547                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1548                                 break;
1549
1550                         /* check for invalid tag */
1551                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1552                                                 (char *)dup - (char *)d)
1553                                 break;
1554
1555                         /* check for block with no data entries */
1556                         if ((ptr == (char *)d + mp->m_dir_geo->data_entry_offset) &&
1557                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1558                                 junkit = 1;
1559                                 *num_illegal += 1;
1560                                 break;
1561                         }
1562
1563                         /* continue at the end of the freespace */
1564                         ptr += be16_to_cpu(dup->length);
1565                         if (ptr >= endptr)
1566                                 break;
1567                 }
1568
1569                 /* validate data entry size */
1570                 dep = (xfs_dir2_data_entry_t *)ptr;
1571                 if (ptr + libxfs_dir2_data_entsize(mp, dep->namelen) > endptr)
1572                         break;
1573                 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp, dep)) !=
1574                                                 (char *)dep - (char *)d)
1575                         break;
1576                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1577         }
1578
1579         /* did we find an empty or corrupt block? */
1580         if (ptr != endptr) {
1581                 if (junkit) {
1582                         do_warn(
1583         _("empty data block %u in directory inode %" PRIu64 ": "),
1584                                 da_bno, ip->i_ino);
1585                 } else {
1586                         do_warn(_
1587         ("corrupt block %u in directory inode %" PRIu64 ": "),
1588                                 da_bno, ip->i_ino);
1589                 }
1590                 if (!no_modify) {
1591                         do_warn(_("junking block\n"));
1592                         dir2_kill_block(mp, ip, da_bno, bp);
1593                 } else {
1594                         do_warn(_("would junk block\n"));
1595                 }
1596                 freetab->ents[db].v = NULLDATAOFF;
1597                 return;
1598         }
1599
1600         /* update number of data blocks processed */
1601         if (freetab->nents < db + 1)
1602                 freetab->nents = db + 1;
1603
1604         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1605         if (error)
1606                 res_failed(error);
1607         da.trans = tp;
1608         libxfs_trans_ijoin(tp, ip, 0);
1609         libxfs_trans_bjoin(tp, bp);
1610         libxfs_trans_bhold(tp, bp);
1611         if (be32_to_cpu(d->magic) != wantmagic) {
1612                 do_warn(
1613         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1614                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1615                 if (!no_modify) {
1616                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1617                         d->magic = cpu_to_be32(wantmagic);
1618                         needlog = 1;
1619                 } else
1620                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1621         }
1622         lastfree = 0;
1623         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1624         /*
1625          * look at each entry.  reference inode pointed to by each
1626          * entry in the incore inode tree.
1627          * if not a directory, set reached flag, increment link count
1628          * if a directory and reached, mark entry as to be deleted.
1629          * if a directory, check to see if recorded parent
1630          *      matches current inode #,
1631          *      if so, then set reached flag, increment link count
1632          *              of current and child dir inodes, push the child
1633          *              directory inode onto the directory stack.
1634          *      if current inode != parent, then mark entry to be deleted.
1635          */
1636         while (ptr < endptr) {
1637                 dup = (xfs_dir2_data_unused_t *)ptr;
1638                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1639                         if (lastfree) {
1640                                 do_warn(
1641         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1642                                         ip->i_ino, da_bno);
1643                                 if (!no_modify) {
1644
1645                                         do_warn(_("joining together\n"));
1646                                         len = be16_to_cpu(dup->length);
1647                                         libxfs_dir2_data_use_free(&da, bp, dup,
1648                                                 ptr - (char *)d, len, &needlog,
1649                                                 &needscan);
1650                                         libxfs_dir2_data_make_free(&da, bp,
1651                                                 ptr - (char *)d, len, &needlog,
1652                                                 &needscan);
1653                                 } else
1654                                         do_warn(_("would join together\n"));
1655                         }
1656                         ptr += be16_to_cpu(dup->length);
1657                         lastfree = 1;
1658                         continue;
1659                 }
1660                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1661                                                   ptr - (char *)d);
1662                 dep = (xfs_dir2_data_entry_t *)ptr;
1663                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1664                 inum = be64_to_cpu(dep->inumber);
1665                 lastfree = 0;
1666                 /*
1667                  * skip bogus entries (leading '/').  they'll be deleted
1668                  * later.  must still log it, else we leak references to
1669                  * buffers.
1670                  */
1671                 if (dep->name[0] == '/')  {
1672                         nbad++;
1673                         if (!no_modify)
1674                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1675                         continue;
1676                 }
1677
1678                 memmove(fname, dep->name, dep->namelen);
1679                 fname[dep->namelen] = '\0';
1680                 ASSERT(inum != NULLFSINO);
1681
1682                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1683                                         XFS_INO_TO_AGINO(mp, inum));
1684                 if (irec == NULL)  {
1685                         nbad++;
1686                         if (entry_junked(
1687         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1688                                         fname, ip->i_ino, inum)) {
1689                                 dep->name[0] = '/';
1690                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1691                         }
1692                         continue;
1693                 }
1694                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1695
1696                 /*
1697                  * if it's a free inode, blow out the entry.
1698                  * by now, any inode that we think is free
1699                  * really is free.
1700                  */
1701                 if (is_inode_free(irec, ino_offset))  {
1702                         nbad++;
1703                         if (entry_junked(
1704         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1705                                         fname, ip->i_ino, inum)) {
1706                                 dep->name[0] = '/';
1707                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1708                         }
1709                         continue;
1710                 }
1711
1712                 /*
1713                  * check if this inode is lost+found dir in the root
1714                  */
1715                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1716                         /*
1717                          * if it's not a directory, trash it
1718                          */
1719                         if (!inode_isadir(irec, ino_offset)) {
1720                                 nbad++;
1721                                 if (entry_junked(
1722         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1723                                                 ORPHANAGE, inum, ip->i_ino)) {
1724                                         dep->name[0] = '/';
1725                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1726                                 }
1727                                 continue;
1728                         }
1729                         /*
1730                          * if this is a dup, it will be picked up below,
1731                          * otherwise, mark it as the orphanage for later.
1732                          */
1733                         if (!orphanage_ino)
1734                                 orphanage_ino = inum;
1735                 }
1736
1737                 /*
1738                  * check for duplicate names in directory.
1739                  */
1740                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1741                                 dep->name, libxfs_dir2_data_get_ftype(mp, dep))) {
1742                         nbad++;
1743                         if (entry_junked(
1744         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1745                                         fname, inum, ip->i_ino)) {
1746                                 dep->name[0] = '/';
1747                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1748                         }
1749                         if (inum == orphanage_ino)
1750                                 orphanage_ino = 0;
1751                         continue;
1752                 }
1753
1754                 /*
1755                  * if just scanning to rebuild a directory due to a ".."
1756                  * update, just continue
1757                  */
1758                 if (dotdot_update)
1759                         continue;
1760
1761                 /*
1762                  * skip the '..' entry since it's checked when the
1763                  * directory is reached by something else.  if it never
1764                  * gets reached, it'll be moved to the orphanage and we'll
1765                  * take care of it then. If it doesn't exist at all, the
1766                  * directory needs to be rebuilt first before being added
1767                  * to the orphanage.
1768                  */
1769                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1770                                 dep->name[1] == '.') {
1771                         if (da_bno != 0) {
1772                                 /* ".." should be in the first block */
1773                                 nbad++;
1774                                 if (entry_junked(
1775         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1776                                                 inum, ip->i_ino)) {
1777                                         dir_hash_junkit(hashtab, addr);
1778                                         dep->name[0] = '/';
1779                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1780                                 }
1781                         }
1782
1783                         if (!nbad)
1784                                 check_longform_ftype(mp, ip, dep, irec,
1785                                                 ino_offset, hashtab, addr, &da,
1786                                                 bp);
1787                         continue;
1788                 }
1789                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
1790                 /*
1791                  * special case the . entry.  we know there's only one
1792                  * '.' and only '.' points to itself because bogus entries
1793                  * got trashed in phase 3 if there were > 1.
1794                  * bump up link count for '.' but don't set reached
1795                  * until we're actually reached by another directory
1796                  * '..' is already accounted for or will be taken care
1797                  * of when directory is moved to orphanage.
1798                  */
1799                 if (ip->i_ino == inum)  {
1800                         ASSERT(no_modify ||
1801                                (dep->name[0] == '.' && dep->namelen == 1));
1802                         add_inode_ref(current_irec, current_ino_offset);
1803                         if (da_bno != 0 ||
1804                             dep != (void *)d + mp->m_dir_geo->data_entry_offset) {
1805                                 /* "." should be the first entry */
1806                                 nbad++;
1807                                 if (entry_junked(
1808         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1809                                                 fname, inum, ip->i_ino)) {
1810                                         dir_hash_junkit(hashtab, addr);
1811                                         dep->name[0] = '/';
1812                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1813                                 }
1814                         }
1815
1816                         if (!nbad)
1817                                 check_longform_ftype(mp, ip, dep, irec,
1818                                                 ino_offset, hashtab, addr, &da,
1819                                                 bp);
1820                         *need_dot = 0;
1821                         continue;
1822                 }
1823                 /*
1824                  * skip entries with bogus inumbers if we're in no modify mode
1825                  */
1826                 if (no_modify && !libxfs_verify_dir_ino(mp, inum))
1827                         continue;
1828
1829                 /* validate ftype field if supported */
1830                 check_longform_ftype(mp, ip, dep, irec, ino_offset, hashtab,
1831                                 addr, &da, bp);
1832
1833                 /*
1834                  * check easy case first, regular inode, just bump
1835                  * the link count and continue
1836                  */
1837                 if (!inode_isadir(irec, ino_offset))  {
1838                         add_inode_reached(irec, ino_offset);
1839                         continue;
1840                 }
1841                 parent = get_inode_parent(irec, ino_offset);
1842                 ASSERT(parent != 0);
1843                 junkit = 0;
1844                 /*
1845                  * bump up the link counts in parent and child
1846                  * directory but if the link doesn't agree with
1847                  * the .. in the child, blow out the entry.
1848                  * if the directory has already been reached,
1849                  * blow away the entry also.
1850                  */
1851                 if (is_inode_reached(irec, ino_offset))  {
1852                         junkit = 1;
1853                         do_warn(
1854 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1855                                 fname, ip->i_ino, inum);
1856                 } else if (parent == ip->i_ino)  {
1857                         add_inode_reached(irec, ino_offset);
1858                         add_inode_ref(current_irec, current_ino_offset);
1859                 } else if (parent == NULLFSINO) {
1860                         /* ".." was missing, but this entry refers to it,
1861                            so, set it as the parent and mark for rebuild */
1862                         do_warn(
1863         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1864                                 fname, ip->i_ino, inum);
1865                         set_inode_parent(irec, ino_offset, ip->i_ino);
1866                         add_inode_reached(irec, ino_offset);
1867                         add_inode_ref(current_irec, current_ino_offset);
1868                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1869                                                                 ino_offset);
1870                 } else  {
1871                         junkit = 1;
1872                         do_warn(
1873 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1874                                 fname, ip->i_ino, parent, inum);
1875                 }
1876                 if (junkit)  {
1877                         if (inum == orphanage_ino)
1878                                 orphanage_ino = 0;
1879                         nbad++;
1880                         if (!no_modify)  {
1881                                 dir_hash_junkit(hashtab, addr);
1882                                 dep->name[0] = '/';
1883                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1884                                 if (verbose)
1885                                         do_warn(
1886                                         _("\twill clear entry \"%s\"\n"),
1887                                                 fname);
1888                         } else  {
1889                                 do_warn(_("\twould clear entry \"%s\"\n"),
1890                                         fname);
1891                         }
1892                 }
1893         }
1894         *num_illegal += nbad;
1895         if (needscan)
1896                 libxfs_dir2_data_freescan(mp, d, &i);
1897         if (needlog)
1898                 libxfs_dir2_data_log_header(&da, bp);
1899         error = -libxfs_trans_commit(tp);
1900         if (error)
1901                 do_error(
1902 _("directory block fixing failed (%d)\n"), error);
1903
1904         /* record the largest free space in the freetab for later checking */
1905         bf = libxfs_dir2_data_bestfree_p(mp, d);
1906         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1907         freetab->ents[db].s = 0;
1908 }
1909
1910 /* check v5 metadata */
1911 static int
1912 __check_dir3_header(
1913         struct xfs_mount        *mp,
1914         struct xfs_buf          *bp,
1915         xfs_ino_t               ino,
1916         __be64                  owner,
1917         __be64                  blkno,
1918         uuid_t                  *uuid)
1919 {
1920
1921         /* verify owner */
1922         if (be64_to_cpu(owner) != ino) {
1923                 do_warn(
1924 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
1925                         ino, (unsigned long long)be64_to_cpu(owner), xfs_buf_daddr(bp));
1926                 return 1;
1927         }
1928         /* verify block number */
1929         if (be64_to_cpu(blkno) != xfs_buf_daddr(bp)) {
1930                 do_warn(
1931 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
1932                         xfs_buf_daddr(bp), (unsigned long long)be64_to_cpu(blkno), ino);
1933                 return 1;
1934         }
1935         /* verify uuid */
1936         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
1937                 do_warn(
1938 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
1939                         ino, xfs_buf_daddr(bp));
1940                 return 1;
1941         }
1942
1943         return 0;
1944 }
1945
1946 static int
1947 check_da3_header(
1948         struct xfs_mount        *mp,
1949         struct xfs_buf          *bp,
1950         xfs_ino_t               ino)
1951 {
1952         struct xfs_da3_blkinfo  *info = bp->b_addr;
1953
1954         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1955                                    &info->uuid);
1956 }
1957
1958 static int
1959 check_dir3_header(
1960         struct xfs_mount        *mp,
1961         struct xfs_buf          *bp,
1962         xfs_ino_t               ino)
1963 {
1964         struct xfs_dir3_blk_hdr *info = bp->b_addr;
1965
1966         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1967                                    &info->uuid);
1968 }
1969
1970 /*
1971  * Check contents of leaf-form block.
1972  */
1973 static int
1974 longform_dir2_check_leaf(
1975         struct xfs_mount        *mp,
1976         struct xfs_inode        *ip,
1977         struct dir_hash_tab     *hashtab,
1978         struct freetab          *freetab)
1979 {
1980         int                     badtail;
1981         __be16                  *bestsp;
1982         struct xfs_buf          *bp;
1983         xfs_dablk_t             da_bno;
1984         int                     i;
1985         xfs_dir2_leaf_t         *leaf;
1986         xfs_dir2_leaf_tail_t    *ltp;
1987         int                     seeval;
1988         struct xfs_dir2_leaf_entry *ents;
1989         struct xfs_dir3_icleaf_hdr leafhdr;
1990         int                     error;
1991         int                     fixit = 0;
1992
1993         da_bno = mp->m_dir_geo->leafblk;
1994         error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_leaf1_buf_ops, &fixit);
1995         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
1996                 do_warn(
1997         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
1998                         da_bno, ip->i_ino);
1999                 return 1;
2000         } else if (error) {
2001                 do_error(
2002         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
2003                         da_bno, ip->i_ino, error);
2004                 /* NOTREACHED */
2005         }
2006
2007         leaf = bp->b_addr;
2008         libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2009         ents = leafhdr.ents;
2010         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
2011         bestsp = xfs_dir2_leaf_bests_p(ltp);
2012         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
2013               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
2014                                 leafhdr.forw || leafhdr.back ||
2015                                 leafhdr.count < leafhdr.stale ||
2016                                 leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2017                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2018                 do_warn(
2019         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2020                         da_bno, ip->i_ino);
2021                 libxfs_buf_relse(bp);
2022                 return 1;
2023         }
2024
2025         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2026                 error = check_da3_header(mp, bp, ip->i_ino);
2027                 if (error) {
2028                         libxfs_buf_relse(bp);
2029                         return error;
2030                 }
2031         }
2032
2033         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2034         if (dir_hash_check(hashtab, ip, seeval)) {
2035                 libxfs_buf_relse(bp);
2036                 return 1;
2037         }
2038         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2039         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2040                 freetab->ents[i].s = 1;
2041                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2042         }
2043         if (badtail) {
2044                 do_warn(
2045         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2046                         da_bno, ip->i_ino);
2047                 libxfs_buf_relse(bp);
2048                 return 1;
2049         }
2050         libxfs_buf_relse(bp);
2051         return fixit;
2052 }
2053
2054 /*
2055  * Check contents of the node blocks (leaves)
2056  * Looks for matching hash values for the data entries.
2057  */
2058 static int
2059 longform_dir2_check_node(
2060         struct xfs_mount        *mp,
2061         struct xfs_inode        *ip,
2062         struct dir_hash_tab     *hashtab,
2063         struct freetab          *freetab)
2064 {
2065         struct xfs_buf          *bp;
2066         xfs_dablk_t             da_bno;
2067         xfs_dir2_db_t           fdb;
2068         xfs_dir2_free_t         *free;
2069         int                     i;
2070         xfs_dir2_leaf_t         *leaf;
2071         xfs_fileoff_t           next_da_bno;
2072         int                     seeval = 0;
2073         int                     used;
2074         struct xfs_dir2_leaf_entry *ents;
2075         struct xfs_dir3_icleaf_hdr leafhdr;
2076         struct xfs_dir3_icfree_hdr freehdr;
2077         __be16                  *bests;
2078         int                     error;
2079         int                     fixit = 0;
2080
2081         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2082                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2083                         da_bno = (xfs_dablk_t)next_da_bno) {
2084                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2085                 if (bmap_next_offset(ip, &next_da_bno))
2086                         break;
2087
2088                 /*
2089                  * we need to use the da3 node verifier here as it handles the
2090                  * fact that reading the leaf hash tree blocks can return either
2091                  * leaf or node blocks and calls the correct verifier. If we get
2092                  * a node block, then we'll skip it below based on a magic
2093                  * number check.
2094                  */
2095                 error = dir_read_buf(ip, da_bno, &bp, &xfs_da3_node_buf_ops,
2096                                 &fixit);
2097                 if (error) {
2098                         do_warn(
2099         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2100                                 da_bno, ip->i_ino, error);
2101                         return 1;
2102                 }
2103                 leaf = bp->b_addr;
2104                 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2105                 ents = leafhdr.ents;
2106                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2107                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2108                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2109                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2110                         do_warn(
2111         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2112                                 leafhdr.magic, da_bno, ip->i_ino);
2113                         libxfs_buf_relse(bp);
2114                         return 1;
2115                 }
2116
2117                 /* check v5 metadata */
2118                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2119                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2120                         error = check_da3_header(mp, bp, ip->i_ino);
2121                         if (error) {
2122                                 libxfs_buf_relse(bp);
2123                                 return error;
2124                         }
2125                 }
2126
2127                 /* ignore nodes */
2128                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2129                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2130                         libxfs_buf_relse(bp);
2131                         continue;
2132                 }
2133
2134                 /*
2135                  * If there's a validator error, we need to ensure that we got
2136                  * the right ops on the buffer for when we write it back out.
2137                  */
2138                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2139                 if (leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2140                     leafhdr.count < leafhdr.stale) {
2141                         do_warn(
2142         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2143                                 da_bno, ip->i_ino);
2144                         libxfs_buf_relse(bp);
2145                         return 1;
2146                 }
2147                 seeval = dir_hash_see_all(hashtab, ents,
2148                                         leafhdr.count, leafhdr.stale);
2149                 libxfs_buf_relse(bp);
2150                 if (seeval != DIR_HASH_CK_OK)
2151                         return 1;
2152         }
2153         if (dir_hash_check(hashtab, ip, seeval))
2154                 return 1;
2155
2156         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2157              next_da_bno != NULLFILEOFF;
2158              da_bno = (xfs_dablk_t)next_da_bno) {
2159                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2160                 if (bmap_next_offset(ip, &next_da_bno))
2161                         break;
2162
2163                 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_free_buf_ops,
2164                                 &fixit);
2165                 if (error) {
2166                         do_warn(
2167         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2168                                 da_bno, ip->i_ino, error);
2169                         return 1;
2170                 }
2171                 free = bp->b_addr;
2172                 libxfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
2173                 bests = freehdr.bests;
2174                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2175                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2176                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2177                     freehdr.firstdb !=
2178                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2179                         mp->m_dir_geo->free_max_bests ||
2180                     freehdr.nvalid < freehdr.nused) {
2181                         do_warn(
2182         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2183                                 da_bno, ip->i_ino);
2184                         libxfs_buf_relse(bp);
2185                         return 1;
2186                 }
2187
2188                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2189                         error = check_dir3_header(mp, bp, ip->i_ino);
2190                         if (error) {
2191                                 libxfs_buf_relse(bp);
2192                                 return error;
2193                         }
2194                 }
2195                 for (i = used = 0; i < freehdr.nvalid; i++) {
2196                         if (i + freehdr.firstdb >= freetab->nents ||
2197                                         freetab->ents[i + freehdr.firstdb].v !=
2198                                                 be16_to_cpu(bests[i])) {
2199                                 do_warn(
2200         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2201                                         da_bno, i, ip->i_ino);
2202                                 libxfs_buf_relse(bp);
2203                                 return 1;
2204                         }
2205                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2206                         freetab->ents[i + freehdr.firstdb].s = 1;
2207                 }
2208                 if (used != freehdr.nused) {
2209                         do_warn(
2210         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2211                                 da_bno, ip->i_ino);
2212                         libxfs_buf_relse(bp);
2213                         return 1;
2214                 }
2215                 libxfs_buf_relse(bp);
2216         }
2217         for (i = 0; i < freetab->nents; i++) {
2218                 if ((freetab->ents[i].s == 0) &&
2219                     (freetab->ents[i].v != NULLDATAOFF)) {
2220                         do_warn(
2221         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2222                                 i, ip->i_ino);
2223                         return 1;
2224                 }
2225         }
2226         return fixit;
2227 }
2228
2229 /*
2230  * If a directory is corrupt, we need to read in as many entries as possible,
2231  * destroy the entry and create a new one with recovered name/inode pairs.
2232  * (ie. get libxfs to do all the grunt work)
2233  */
2234 static void
2235 longform_dir2_entry_check(
2236         struct xfs_mount        *mp,
2237         xfs_ino_t               ino,
2238         struct xfs_inode        *ip,
2239         int                     *num_illegal,
2240         int                     *need_dot,
2241         struct ino_tree_node    *irec,
2242         int                     ino_offset,
2243         struct dir_hash_tab     *hashtab)
2244 {
2245         struct xfs_buf          *bp = NULL;
2246         xfs_dablk_t             da_bno;
2247         freetab_t               *freetab;
2248         int                     i;
2249         int                     isblock;
2250         int                     isleaf;
2251         xfs_fileoff_t           next_da_bno;
2252         int                     seeval;
2253         int                     fixit = 0;
2254         struct xfs_da_args      args;
2255
2256         *need_dot = 1;
2257         freetab = malloc(FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2258         if (!freetab) {
2259                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2260                         __func__,
2261                         FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2262                 exit(1);
2263         }
2264         freetab->naents = ip->i_disk_size / mp->m_dir_geo->blksize;
2265         freetab->nents = 0;
2266         for (i = 0; i < freetab->naents; i++) {
2267                 freetab->ents[i].v = NULLDATAOFF;
2268                 freetab->ents[i].s = 0;
2269         }
2270
2271         /* is this a block, leaf, or node directory? */
2272         args.dp = ip;
2273         args.geo = mp->m_dir_geo;
2274         libxfs_dir2_isblock(&args, &isblock);
2275         libxfs_dir2_isleaf(&args, &isleaf);
2276
2277         /* check directory "data" blocks (ie. name/inode pairs) */
2278         for (da_bno = 0, next_da_bno = 0;
2279              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2280              da_bno = (xfs_dablk_t)next_da_bno) {
2281                 const struct xfs_buf_ops *ops;
2282                 int                      error;
2283                 struct xfs_dir2_data_hdr *d;
2284
2285                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2286                 if (bmap_next_offset(ip, &next_da_bno)) {
2287                         /*
2288                          * if this is the first block, there isn't anything we
2289                          * can recover so we just trash it.
2290                          */
2291                          if (da_bno == 0) {
2292                                 fixit++;
2293                                 goto out_fix;
2294                         }
2295                         break;
2296                 }
2297
2298                 if (isblock)
2299                         ops = &xfs_dir3_block_buf_ops;
2300                 else
2301                         ops = &xfs_dir3_data_buf_ops;
2302
2303                 error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
2304                 if (error) {
2305                         do_warn(
2306         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2307                                 da_bno, ino, error);
2308                         *num_illegal += 1;
2309
2310                         /*
2311                          * we try to read all "data" blocks, but if we are in
2312                          * block form and we fail, there isn't anything else to
2313                          * read, and nothing we can do but trash it.
2314                          */
2315                         if (isblock) {
2316                                 fixit++;
2317                                 goto out_fix;
2318                         }
2319                         continue;
2320                 }
2321
2322                 /* check v5 metadata */
2323                 d = bp->b_addr;
2324                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2325                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2326                         error = check_dir3_header(mp, bp, ino);
2327                         if (error) {
2328                                 fixit++;
2329                                 if (isblock)
2330                                         goto out_fix;
2331                                 continue;
2332                         }
2333                 }
2334
2335                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2336                                 irec, ino_offset, bp, hashtab,
2337                                 &freetab, da_bno, isblock);
2338                 if (isblock)
2339                         break;
2340
2341                 libxfs_buf_relse(bp);
2342         }
2343         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2344
2345         if (!dotdot_update) {
2346                 /* check btree and freespace */
2347                 if (isblock) {
2348                         struct xfs_dir2_data_hdr *block;
2349                         xfs_dir2_block_tail_t   *btp;
2350                         xfs_dir2_leaf_entry_t   *blp;
2351
2352                         block = bp->b_addr;
2353                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2354                         blp = xfs_dir2_block_leaf_p(btp);
2355                         seeval = dir_hash_see_all(hashtab, blp,
2356                                                 be32_to_cpu(btp->count),
2357                                                 be32_to_cpu(btp->stale));
2358                         if (dir_hash_check(hashtab, ip, seeval))
2359                                 fixit |= 1;
2360                 } else if (isleaf) {
2361                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2362                                                                 freetab);
2363                 } else {
2364                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2365                                                                 freetab);
2366                 }
2367         }
2368 out_fix:
2369         if (isblock && bp)
2370                 libxfs_buf_relse(bp);
2371
2372         if (!no_modify && (fixit || dotdot_update)) {
2373                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2374                 *num_illegal = 0;
2375                 *need_dot = 0;
2376         } else {
2377                 if (fixit || dotdot_update)
2378                         do_warn(
2379         _("would rebuild directory inode %" PRIu64 "\n"), ino);
2380         }
2381
2382         free(freetab);
2383 }
2384
2385 /*
2386  * shortform directory v2 processing routines -- entry verification and
2387  * bad entry deletion (pruning).
2388  */
2389 static struct xfs_dir2_sf_entry *
2390 shortform_dir2_junk(
2391         struct xfs_mount        *mp,
2392         struct xfs_dir2_sf_hdr  *sfp,
2393         struct xfs_dir2_sf_entry *sfep,
2394         xfs_ino_t               lino,
2395         int                     *max_size,
2396         int                     *index,
2397         int                     *bytes_deleted,
2398         int                     *ino_dirty)
2399 {
2400         struct xfs_dir2_sf_entry *next_sfep;
2401         int                     next_len;
2402         int                     next_elen;
2403
2404         if (lino == orphanage_ino)
2405                 orphanage_ino = 0;
2406
2407         next_elen = libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen);
2408         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2409
2410         /*
2411          * if we are just checking, simply return the pointer to the next entry
2412          * here so that the checking loop can continue.
2413          */
2414         if (no_modify) {
2415                 do_warn(_("would junk entry\n"));
2416                 return next_sfep;
2417         }
2418
2419         /*
2420          * now move all the remaining entries down over the junked entry and
2421          * clear the newly unused bytes at the tail of the directory region.
2422          */
2423         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2424         *max_size -= next_elen;
2425         *bytes_deleted += next_elen;
2426
2427         memmove(sfep, next_sfep, next_len);
2428         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2429         sfp->count -= 1;
2430         *ino_dirty = 1;
2431
2432         /*
2433          * WARNING:  drop the index i by one so it matches the decremented count
2434          * for accurate comparisons in the loop test
2435          */
2436         (*index)--;
2437
2438         if (verbose)
2439                 do_warn(_("junking entry\n"));
2440         else
2441                 do_warn("\n");
2442         return sfep;
2443 }
2444
2445 static void
2446 shortform_dir2_entry_check(
2447         struct xfs_mount        *mp,
2448         xfs_ino_t               ino,
2449         struct xfs_inode        *ip,
2450         int                     *ino_dirty,
2451         struct ino_tree_node    *current_irec,
2452         int                     current_ino_offset,
2453         struct dir_hash_tab     *hashtab)
2454 {
2455         xfs_ino_t               lino;
2456         xfs_ino_t               parent;
2457         struct xfs_dir2_sf_hdr  *sfp;
2458         struct xfs_dir2_sf_entry *sfep;
2459         struct xfs_dir2_sf_entry *next_sfep;
2460         struct xfs_ifork        *ifp;
2461         struct ino_tree_node    *irec;
2462         int                     max_size;
2463         int                     ino_offset;
2464         int                     i;
2465         int                     bad_sfnamelen;
2466         int                     namelen;
2467         int                     bytes_deleted;
2468         char                    fname[MAXNAMELEN + 1];
2469         int                     i8;
2470
2471         ifp = &ip->i_df;
2472         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2473         *ino_dirty = 0;
2474         bytes_deleted = 0;
2475
2476         max_size = ifp->if_bytes;
2477         ASSERT(ip->i_disk_size <= ifp->if_bytes);
2478
2479         /*
2480          * if just rebuild a directory due to a "..", update and return
2481          */
2482         if (dotdot_update) {
2483                 parent = get_inode_parent(current_irec, current_ino_offset);
2484                 if (no_modify) {
2485                         do_warn(
2486         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2487                                 ino, parent);
2488                 } else {
2489                         do_warn(
2490         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2491                                 ino, parent);
2492                         libxfs_dir2_sf_put_parent_ino(sfp, parent);
2493                         *ino_dirty = 1;
2494                 }
2495                 return;
2496         }
2497
2498         /*
2499          * no '.' entry in shortform dirs, just bump up ref count by 1
2500          * '..' was already (or will be) accounted for and checked when
2501          * the directory is reached or will be taken care of when the
2502          * directory is moved to orphanage.
2503          */
2504         add_inode_ref(current_irec, current_ino_offset);
2505
2506         /*
2507          * Initialise i8 counter -- the parent inode number counts as well.
2508          */
2509         i8 = libxfs_dir2_sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2510
2511         /*
2512          * now run through entries, stop at first bad entry, don't need
2513          * to skip over '..' since that's encoded in its own field and
2514          * no need to worry about '.' since it doesn't exist.
2515          */
2516         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2517
2518         for (i = 0; i < sfp->count && max_size >
2519                                         (intptr_t)next_sfep - (intptr_t)sfp;
2520                         sfep = next_sfep, i++)  {
2521                 bad_sfnamelen = 0;
2522
2523                 lino = libxfs_dir2_sf_get_ino(mp, sfp, sfep);
2524
2525                 namelen = sfep->namelen;
2526
2527                 ASSERT(no_modify || namelen > 0);
2528
2529                 if (no_modify && namelen == 0)  {
2530                         /*
2531                          * if we're really lucky, this is
2532                          * the last entry in which case we
2533                          * can use the dir size to set the
2534                          * namelen value.  otherwise, forget
2535                          * it because we're not going to be
2536                          * able to find the next entry.
2537                          */
2538                         bad_sfnamelen = 1;
2539
2540                         if (i == sfp->count - 1)  {
2541                                 namelen = ip->i_disk_size -
2542                                         ((intptr_t) &sfep->name[0] -
2543                                          (intptr_t) sfp);
2544                         } else  {
2545                                 /*
2546                                  * don't process the rest of the directory,
2547                                  * break out of processing loop
2548                                  */
2549                                 break;
2550                         }
2551                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2552                                 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
2553                                 > ip->i_disk_size)  {
2554                         bad_sfnamelen = 1;
2555
2556                         if (i == sfp->count - 1)  {
2557                                 namelen = ip->i_disk_size -
2558                                         ((intptr_t) &sfep->name[0] -
2559                                          (intptr_t) sfp);
2560                         } else  {
2561                                 /*
2562                                  * don't process the rest of the directory,
2563                                  * break out of processing loop
2564                                  */
2565                                 break;
2566                         }
2567                 }
2568
2569                 memmove(fname, sfep->name, sfep->namelen);
2570                 fname[sfep->namelen] = '\0';
2571
2572                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2573                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, lino));
2574
2575                 /*
2576                  * Also skip entries with bogus inode numbers if we're
2577                  * in no modify mode.
2578                  */
2579
2580                 if (no_modify && !libxfs_verify_dir_ino(mp, lino))  {
2581                         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2582                         continue;
2583                 }
2584
2585                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2586                                         XFS_INO_TO_AGINO(mp, lino));
2587
2588                 if (irec == NULL)  {
2589                         do_warn(
2590         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2591                                 fname, ino, lino);
2592                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2593                                                 &max_size, &i, &bytes_deleted,
2594                                                 ino_dirty);
2595                         continue;
2596                 }
2597
2598                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2599
2600                 /*
2601                  * if it's a free inode, blow out the entry.
2602                  * by now, any inode that we think is free
2603                  * really is free.
2604                  */
2605                 if (is_inode_free(irec, ino_offset))  {
2606                         do_warn(
2607         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2608                                 fname, ino, lino);
2609                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2610                                                 &max_size, &i, &bytes_deleted,
2611                                                 ino_dirty);
2612                         continue;
2613                 }
2614                 /*
2615                  * check if this inode is lost+found dir in the root
2616                  */
2617                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2618                         /*
2619                          * if it's not a directory, trash it
2620                          */
2621                         if (!inode_isadir(irec, ino_offset)) {
2622                                 do_warn(
2623         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2624                                         ORPHANAGE, lino, ino);
2625                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2626                                                 lino, &max_size, &i,
2627                                                 &bytes_deleted, ino_dirty);
2628                                 continue;
2629                         }
2630                         /*
2631                          * if this is a dup, it will be picked up below,
2632                          * otherwise, mark it as the orphanage for later.
2633                          */
2634                         if (!orphanage_ino)
2635                                 orphanage_ino = lino;
2636                 }
2637                 /*
2638                  * check for duplicate names in directory.
2639                  */
2640                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2641                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2642                                 lino, sfep->namelen, sfep->name,
2643                                 libxfs_dir2_sf_get_ftype(mp, sfep))) {
2644                         do_warn(
2645 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2646                                 fname, lino, ino);
2647                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2648                                                 &max_size, &i, &bytes_deleted,
2649                                                 ino_dirty);
2650                         continue;
2651                 }
2652
2653                 if (!inode_isadir(irec, ino_offset))  {
2654                         /*
2655                          * check easy case first, regular inode, just bump
2656                          * the link count
2657                          */
2658                         add_inode_reached(irec, ino_offset);
2659                 } else  {
2660                         parent = get_inode_parent(irec, ino_offset);
2661
2662                         /*
2663                          * bump up the link counts in parent and child.
2664                          * directory but if the link doesn't agree with
2665                          * the .. in the child, blow out the entry
2666                          */
2667                         if (is_inode_reached(irec, ino_offset))  {
2668                                 do_warn(
2669         _("entry \"%s\" in directory inode %" PRIu64
2670           " references already connected inode %" PRIu64 ".\n"),
2671                                         fname, ino, lino);
2672                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2673                                                 lino, &max_size, &i,
2674                                                 &bytes_deleted, ino_dirty);
2675                                 continue;
2676                         } else if (parent == ino)  {
2677                                 add_inode_reached(irec, ino_offset);
2678                                 add_inode_ref(current_irec, current_ino_offset);
2679                         } else if (parent == NULLFSINO) {
2680                                 /* ".." was missing, but this entry refers to it,
2681                                 so, set it as the parent and mark for rebuild */
2682                                 do_warn(
2683         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2684                                         fname, ino, lino);
2685                                 set_inode_parent(irec, ino_offset, ino);
2686                                 add_inode_reached(irec, ino_offset);
2687                                 add_inode_ref(current_irec, current_ino_offset);
2688                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2689                                                         irec, ino_offset);
2690                         } else  {
2691                                 do_warn(
2692         _("entry \"%s\" in directory inode %" PRIu64
2693           " not consistent with .. value (%" PRIu64
2694           ") in inode %" PRIu64 ",\n"),
2695                                         fname, ino, parent, lino);
2696                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2697                                                 lino, &max_size, &i,
2698                                                 &bytes_deleted, ino_dirty);
2699                                 continue;
2700                         }
2701                 }
2702
2703                 /* validate ftype field if supported */
2704                 if (xfs_has_ftype(mp)) {
2705                         uint8_t dir_ftype;
2706                         uint8_t ino_ftype;
2707
2708                         dir_ftype = libxfs_dir2_sf_get_ftype(mp, sfep);
2709                         ino_ftype = get_inode_ftype(irec, ino_offset);
2710
2711                         if (dir_ftype != ino_ftype) {
2712                                 if (no_modify) {
2713                                         do_warn(
2714         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2715                                                 dir_ftype, ino_ftype,
2716                                                 ino, lino);
2717                                 } else {
2718                                         do_warn(
2719         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2720                                                 dir_ftype, ino_ftype,
2721                                                 ino, lino);
2722                                         libxfs_dir2_sf_put_ftype(mp, sfep,
2723                                                                 ino_ftype);
2724                                         dir_hash_update_ftype(hashtab,
2725                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2726                                                               ino_ftype);
2727                                         *ino_dirty = 1;
2728                                 }
2729                         }
2730                 }
2731
2732                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2733                         i8++;
2734
2735                 /*
2736                  * go onto next entry - we have to take entries with bad namelen
2737                  * into account in no modify mode since we calculate size based
2738                  * on next_sfep.
2739                  */
2740                 ASSERT(no_modify || bad_sfnamelen == 0);
2741                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2742                               (bad_sfnamelen
2743                                 ? libxfs_dir2_sf_entsize(mp, sfp, namelen)
2744                                 : libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)));
2745         }
2746
2747         if (sfp->i8count != i8) {
2748                 if (no_modify) {
2749                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2750                                 ino);
2751                 } else {
2752                         if (i8 == 0) {
2753                                 struct xfs_dir2_sf_entry *tmp_sfep;
2754
2755                                 tmp_sfep = next_sfep;
2756                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2757                                 bytes_deleted +=
2758                                         (intptr_t)next_sfep -
2759                                         (intptr_t)tmp_sfep;
2760                                 next_sfep = tmp_sfep;
2761                         } else
2762                                 sfp->i8count = i8;
2763                         *ino_dirty = 1;
2764                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2765                                 ino);
2766                 }
2767         }
2768
2769         /*
2770          * sync up sizes if required
2771          */
2772         if (*ino_dirty && bytes_deleted > 0)  {
2773                 ASSERT(!no_modify);
2774                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2775                 ip->i_disk_size -= bytes_deleted;
2776         }
2777
2778         if (ip->i_disk_size != ip->i_df.if_bytes)  {
2779                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2780                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2781                 ip->i_disk_size = (xfs_fsize_t)
2782                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2783                 do_warn(
2784         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2785                         ip->i_disk_size);
2786                 *ino_dirty = 1;
2787         }
2788 }
2789
2790 /*
2791  * processes all reachable inodes in directories
2792  */
2793 static void
2794 process_dir_inode(
2795         struct xfs_mount        *mp,
2796         xfs_agnumber_t          agno,
2797         struct ino_tree_node    *irec,
2798         int                     ino_offset)
2799 {
2800         xfs_ino_t               ino;
2801         struct xfs_inode        *ip;
2802         struct xfs_trans        *tp;
2803         struct dir_hash_tab     *hashtab;
2804         int                     need_dot;
2805         int                     dirty, num_illegal, error, nres;
2806
2807         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2808
2809         /*
2810          * open up directory inode, check all entries,
2811          * then call prune_dir_entries to remove all
2812          * remaining illegal directory entries.
2813          */
2814
2815         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2816
2817         error = -libxfs_iget(mp, NULL, ino, 0, &ip);
2818         if (error) {
2819                 if (!no_modify)
2820                         do_error(
2821         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2822                                 ino, error);
2823                 else  {
2824                         do_warn(
2825         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2826                                 ino, error);
2827                         /*
2828                          * see below for what we're doing if this
2829                          * is root.  Why do we need to do this here?
2830                          * to ensure that the root doesn't show up
2831                          * as being disconnected in the no_modify case.
2832                          */
2833                         if (mp->m_sb.sb_rootino == ino)  {
2834                                 add_inode_reached(irec, 0);
2835                                 add_inode_ref(irec, 0);
2836                         }
2837                 }
2838
2839                 add_inode_refchecked(irec, 0);
2840                 return;
2841         }
2842
2843         need_dot = dirty = num_illegal = 0;
2844
2845         if (mp->m_sb.sb_rootino == ino)  {
2846                 /*
2847                  * mark root inode reached and bump up
2848                  * link count for root inode to account
2849                  * for '..' entry since the root inode is
2850                  * never reached by a parent.  we know
2851                  * that root's '..' is always good --
2852                  * guaranteed by phase 3 and/or below.
2853                  */
2854                 add_inode_reached(irec, ino_offset);
2855         }
2856
2857         add_inode_refchecked(irec, ino_offset);
2858
2859         hashtab = dir_hash_init(ip->i_disk_size);
2860
2861         /*
2862          * look for bogus entries
2863          */
2864         switch (ip->i_df.if_format)  {
2865                 case XFS_DINODE_FMT_EXTENTS:
2866                 case XFS_DINODE_FMT_BTREE:
2867                         /*
2868                          * also check for missing '.' in longform dirs.
2869                          * missing .. entries are added if required when
2870                          * the directory is connected to lost+found. but
2871                          * we need to create '.' entries here.
2872                          */
2873                         longform_dir2_entry_check(mp, ino, ip,
2874                                                 &num_illegal, &need_dot,
2875                                                 irec, ino_offset,
2876                                                 hashtab);
2877                         break;
2878
2879                 case XFS_DINODE_FMT_LOCAL:
2880                         /*
2881                          * using the remove reservation is overkill
2882                          * since at most we'll only need to log the
2883                          * inode but it's easier than wedging a
2884                          * new define in ourselves.
2885                          */
2886                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2887                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2888                                                     nres, 0, 0, &tp);
2889                         if (error)
2890                                 res_failed(error);
2891
2892                         libxfs_trans_ijoin(tp, ip, 0);
2893
2894                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2895                                                 irec, ino_offset,
2896                                                 hashtab);
2897
2898                         ASSERT(dirty == 0 || (dirty && !no_modify));
2899                         if (dirty)  {
2900                                 libxfs_trans_log_inode(tp, ip,
2901                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
2902                                 error = -libxfs_trans_commit(tp);
2903                                 if (error)
2904                                         do_error(
2905 _("error %d fixing shortform directory %llu\n"),
2906                                                 error,
2907                                                 (unsigned long long)ip->i_ino);
2908                         } else  {
2909                                 libxfs_trans_cancel(tp);
2910                         }
2911                         break;
2912
2913                 default:
2914                         break;
2915         }
2916         dir_hash_done(hashtab);
2917
2918         /*
2919          * if we have to create a .. for /, do it now *before*
2920          * we delete the bogus entries, otherwise the directory
2921          * could transform into a shortform dir which would
2922          * probably cause the simulation to choke.  Even
2923          * if the illegal entries get shifted around, it's ok
2924          * because the entries are structurally intact and in
2925          * in hash-value order so the simulation won't get confused
2926          * if it has to move them around.
2927          */
2928         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2929                 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
2930
2931                 do_warn(_("recreating root directory .. entry\n"));
2932
2933                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
2934                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2935                                             nres, 0, 0, &tp);
2936                 if (error)
2937                         res_failed(error);
2938
2939                 libxfs_trans_ijoin(tp, ip, 0);
2940
2941                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
2942                                         ip->i_ino, nres);
2943                 if (error)
2944                         do_error(
2945         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
2946
2947                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2948                 error = -libxfs_trans_commit(tp);
2949                 if (error)
2950                         do_error(
2951         _("root inode \"..\" entry recreation failed (%d)\n"), error);
2952
2953                 need_root_dotdot = 0;
2954         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2955                 do_warn(_("would recreate root directory .. entry\n"));
2956         }
2957
2958         /*
2959          * if we need to create the '.' entry, do so only if
2960          * the directory is a longform dir.  if it's been
2961          * turned into a shortform dir, then the inode is ok
2962          * since shortform dirs have no '.' entry and the inode
2963          * has already been committed by prune_lf_dir_entry().
2964          */
2965         if (need_dot)  {
2966                 /*
2967                  * bump up our link count but don't
2968                  * bump up the inode link count.  chances
2969                  * are good that even though we lost '.'
2970                  * the inode link counts reflect '.' so
2971                  * leave the inode link count alone and if
2972                  * it turns out to be wrong, we'll catch
2973                  * that in phase 7.
2974                  */
2975                 add_inode_ref(irec, ino_offset);
2976
2977                 if (no_modify)  {
2978                         do_warn(
2979         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
2980                                 ino);
2981                 } else if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)  {
2982                         /*
2983                          * need to create . entry in longform dir.
2984                          */
2985                         do_warn(
2986         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
2987
2988                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
2989                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2990                                                     nres, 0, 0, &tp);
2991                         if (error)
2992                                 res_failed(error);
2993
2994                         libxfs_trans_ijoin(tp, ip, 0);
2995
2996                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
2997                                         ip->i_ino, nres);
2998                         if (error)
2999                                 do_error(
3000         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
3001                                         ino, error);
3002
3003                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3004                         error = -libxfs_trans_commit(tp);
3005                         if (error)
3006                                 do_error(
3007         _("root inode \".\" entry recreation failed (%d)\n"), error);
3008                 }
3009         }
3010         libxfs_irele(ip);
3011 }
3012
3013 /*
3014  * mark realtime bitmap and summary inodes as reached.
3015  * quota inode will be marked here as well
3016  */
3017 static void
3018 mark_standalone_inodes(xfs_mount_t *mp)
3019 {
3020         ino_tree_node_t         *irec;
3021         int                     offset;
3022
3023         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3024                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3025
3026         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3027                         irec->ino_startnum;
3028
3029         add_inode_reached(irec, offset);
3030
3031         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3032                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3033
3034         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3035                         irec->ino_startnum;
3036
3037         add_inode_reached(irec, offset);
3038
3039         if (fs_quotas)  {
3040                 if (mp->m_sb.sb_uquotino
3041                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3042                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3043                                                 mp->m_sb.sb_uquotino),
3044                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3045                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3046                                         - irec->ino_startnum;
3047                         add_inode_reached(irec, offset);
3048                 }
3049                 if (mp->m_sb.sb_gquotino
3050                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3051                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3052                                                 mp->m_sb.sb_gquotino),
3053                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3054                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3055                                         - irec->ino_startnum;
3056                         add_inode_reached(irec, offset);
3057                 }
3058                 if (mp->m_sb.sb_pquotino
3059                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3060                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3061                                                 mp->m_sb.sb_pquotino),
3062                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3063                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3064                                         - irec->ino_startnum;
3065                         add_inode_reached(irec, offset);
3066                 }
3067         }
3068 }
3069
3070 static void
3071 check_for_orphaned_inodes(
3072         xfs_mount_t             *mp,
3073         xfs_agnumber_t          agno,
3074         ino_tree_node_t         *irec)
3075 {
3076         int                     i;
3077         xfs_ino_t               ino;
3078
3079         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3080                 ASSERT(is_inode_confirmed(irec, i));
3081                 if (is_inode_free(irec, i))
3082                         continue;
3083
3084                 if (is_inode_reached(irec, i))
3085                         continue;
3086
3087                 ASSERT(inode_isadir(irec, i) ||
3088                         num_inode_references(irec, i) == 0);
3089
3090                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3091                 if (inode_isadir(irec, i))
3092                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3093                 else
3094                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3095                 if (!no_modify)  {
3096                         if (!orphanage_ino)
3097                                 orphanage_ino = mk_orphanage(mp);
3098                         do_warn(_("moving to %s\n"), ORPHANAGE);
3099                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3100                 } else  {
3101                         do_warn(_("would move to %s\n"), ORPHANAGE);
3102                 }
3103                 /*
3104                  * for read-only case, even though the inode isn't
3105                  * really reachable, set the flag (and bump our link
3106                  * count) anyway to fool phase 7
3107                  */
3108                 add_inode_reached(irec, i);
3109         }
3110 }
3111
3112 static void
3113 do_dir_inode(
3114         struct workqueue        *wq,
3115         xfs_agnumber_t          agno,
3116         void                    *arg)
3117 {
3118         struct ino_tree_node    *irec = arg;
3119         int                     i;
3120
3121         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3122                 if (inode_isadir(irec, i))
3123                         process_dir_inode(wq->wq_ctx, agno, irec, i);
3124         }
3125 }
3126
3127 static void
3128 traverse_function(
3129         struct workqueue        *wq,
3130         xfs_agnumber_t          agno,
3131         void                    *arg)
3132 {
3133         struct ino_tree_node    *irec;
3134         prefetch_args_t         *pf_args = arg;
3135         struct workqueue        lwq;
3136         struct xfs_mount        *mp = wq->wq_ctx;
3137
3138         wait_for_inode_prefetch(pf_args);
3139
3140         if (verbose)
3141                 do_log(_("        - agno = %d\n"), agno);
3142
3143         /*
3144          * The more AGs we have in flight at once, the fewer processing threads
3145          * per AG. This means we don't overwhelm the machine with hundreds of
3146          * threads when we start acting on lots of AGs at once. We just want
3147          * enough that we can keep multiple CPUs busy across multiple AGs.
3148          */
3149         workqueue_create_bound(&lwq, mp, ag_stride, 1000);
3150
3151         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3152                 if (irec->ino_isa_dir == 0)
3153                         continue;
3154
3155                 if (pf_args) {
3156                         sem_post(&pf_args->ra_count);
3157 #ifdef XR_PF_TRACE
3158                         {
3159                         int     i;
3160                         sem_getvalue(&pf_args->ra_count, &i);
3161                         pftrace(
3162                 "processing inode chunk %p in AG %d (sem count = %d)",
3163                                 irec, agno, i);
3164                         }
3165 #endif
3166                 }
3167
3168                 queue_work(&lwq, do_dir_inode, agno, irec);
3169         }
3170         destroy_work_queue(&lwq);
3171         cleanup_inode_prefetch(pf_args);
3172 }
3173
3174 static void
3175 update_missing_dotdot_entries(
3176         xfs_mount_t             *mp)
3177 {
3178         dotdot_update_t         *dir;
3179
3180         /*
3181          * these entries parents were updated, rebuild them again
3182          * set dotdot_update flag so processing routines do not count links
3183          */
3184         dotdot_update = 1;
3185         while (!list_empty(&dotdot_update_list)) {
3186                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3187                                  list);
3188                 list_del(&dir->list);
3189                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3190                 free(dir);
3191         }
3192 }
3193
3194 static void
3195 traverse_ags(
3196         struct xfs_mount        *mp)
3197 {
3198         do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
3199 }
3200
3201 void
3202 phase6(xfs_mount_t *mp)
3203 {
3204         ino_tree_node_t         *irec;
3205         int                     i;
3206
3207         memset(&zerocr, 0, sizeof(struct cred));
3208         memset(&zerofsx, 0, sizeof(struct fsxattr));
3209         orphanage_ino = 0;
3210
3211         do_log(_("Phase 6 - check inode connectivity...\n"));
3212
3213         incore_ext_teardown(mp);
3214
3215         add_ino_ex_data(mp);
3216
3217         /*
3218          * verify existence of root directory - if we have to
3219          * make one, it's ok for the incore data structs not to
3220          * know about it since everything about it (and the other
3221          * inodes in its chunk if a new chunk was created) are ok
3222          */
3223         if (need_root_inode)  {
3224                 if (!no_modify)  {
3225                         do_warn(_("reinitializing root directory\n"));
3226                         mk_root_dir(mp);
3227                         need_root_inode = 0;
3228                         need_root_dotdot = 0;
3229                 } else  {
3230                         do_warn(_("would reinitialize root directory\n"));
3231                 }
3232         }
3233
3234         if (need_rbmino)  {
3235                 if (!no_modify)  {
3236                         do_warn(_("reinitializing realtime bitmap inode\n"));
3237                         mk_rbmino(mp);
3238                         need_rbmino = 0;
3239                 } else  {
3240                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3241                 }
3242         }
3243
3244         if (need_rsumino)  {
3245                 if (!no_modify)  {
3246                         do_warn(_("reinitializing realtime summary inode\n"));
3247                         mk_rsumino(mp);
3248                         need_rsumino = 0;
3249                 } else  {
3250                         do_warn(_("would reinitialize realtime summary inode\n"));
3251                 }
3252         }
3253
3254         if (!no_modify)  {
3255                 do_log(
3256 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3257                 if (fill_rbmino(mp))  {
3258                         do_warn(
3259                         _("Warning:  realtime bitmap may be inconsistent\n"));
3260                 }
3261
3262                 if (fill_rsumino(mp))  {
3263                         do_warn(
3264                         _("Warning:  realtime bitmap may be inconsistent\n"));
3265                 }
3266         }
3267
3268         mark_standalone_inodes(mp);
3269
3270         do_log(_("        - traversing filesystem ...\n"));
3271
3272         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3273                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3274
3275         /*
3276          * we always have a root inode, even if it's free...
3277          * if the root is free, forget it, lost+found is already gone
3278          */
3279         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3280                 need_root_inode = 1;
3281         }
3282
3283         /*
3284          * then process all inodes by walking incore inode tree
3285          */
3286         traverse_ags(mp);
3287
3288         /*
3289          * any directories that had updated ".." entries, rebuild them now
3290          */
3291         update_missing_dotdot_entries(mp);
3292
3293         do_log(_("        - traversal finished ...\n"));
3294         do_log(_("        - moving disconnected inodes to %s ...\n"),
3295                 ORPHANAGE);
3296
3297         /*
3298          * move all disconnected inodes to the orphanage
3299          */
3300         for (i = 0; i < glob_agcount; i++)  {
3301                 irec = findfirst_inode_rec(i);
3302                 while (irec != NULL)  {
3303                         check_for_orphaned_inodes(mp, i, irec);
3304                         irec = next_ino_rec(irec);
3305                 }
3306         }
3307 }