repair/phase6.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "threads.h"
   9 #include "threads.h"
  10 #include "prefetch.h"
  11 #include "avl.h"
  12 #include "globals.h"
  13 #include "agheader.h"
  14 #include "incore.h"
  15 #include "dir2.h"
  16 #include "protos.h"
  17 #include "err_protos.h"
  18 #include "dinode.h"
  19 #include "progress.h"
  20 #include "versions.h"
  21
  22 static struct cred              zerocr;
  23 static struct fsxattr           zerofsx;
  24 static xfs_ino_t                orphanage_ino;
  25
  26 static struct xfs_name          xfs_name_dot = {(unsigned char *)".",
  27                                                 1,
  28                                                 XFS_DIR3_FT_DIR};
  29
  30 /*
  31  * Data structures used to keep track of directories where the ".."
  32  * entries are updated. These must be rebuilt after the initial pass
  33  */
  34 typedef struct dotdot_update {
  35         struct list_head        list;
  36         ino_tree_node_t         *irec;
  37         xfs_agnumber_t          agno;
  38         int                     ino_offset;
  39 } dotdot_update_t;
  40
  41 static LIST_HEAD(dotdot_update_list);
  42 static int                      dotdot_update;
  43
  44 static void
  45 add_dotdot_update(
  46         xfs_agnumber_t          agno,
  47         ino_tree_node_t         *irec,
  48         int                     ino_offset)
  49 {
  50         dotdot_update_t         *dir = malloc(sizeof(dotdot_update_t));
  51
  52         if (!dir)
  53                 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
  54                         sizeof(dotdot_update_t));
  55
  56         INIT_LIST_HEAD(&dir->list);
  57         dir->irec = irec;
  58         dir->agno = agno;
  59         dir->ino_offset = ino_offset;
  60
  61         list_add(&dir->list, &dotdot_update_list);
  62 }
  63
  64 /*
  65  * Data structures and routines to keep track of directory entries
  66  * and whether their leaf entry has been seen. Also used for name
  67  * duplicate checking and rebuilding step if required.
  68  */
  69 struct dir_hash_ent {
  70         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
  71         struct dir_hash_ent     *nextbyorder;   /* next in order added */
  72         xfs_dahash_t            hashval;        /* hash value of name */
  73         uint32_t                address;        /* offset of data entry */
  74         xfs_ino_t               inum;           /* inode num of entry */
  75         short                   junkit;         /* name starts with / */
  76         short                   seen;           /* have seen leaf entry */
  77         struct xfs_name         name;
  78         unsigned char           namebuf[];
  79 };
  80
  81 struct dir_hash_tab {
  82         int                     size;           /* size of hash tables */
  83         struct dir_hash_ent     *first;         /* ptr to first added entry */
  84         struct dir_hash_ent     *last;          /* ptr to last added entry */
  85         struct dir_hash_ent     **byhash;       /* ptr to name hash buckets */
  86 #define HT_UNSEEN               1
  87         struct radix_tree_root  byaddr;
  88 };
  89
  90 #define DIR_HASH_TAB_SIZE(n)    \
  91         (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
  92 #define DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
  93
  94 /*
  95  * Track the contents of the freespace table in a directory.
  96  */
  97 typedef struct freetab {
  98         int                     naents; /* expected number of data blocks */
  99         int                     nents;  /* number of data blocks processed */
 100         struct freetab_ent {
 101                 xfs_dir2_data_off_t     v;
 102                 short                   s;
 103         } ents[1];
 104 } freetab_t;
 105 #define FREETAB_SIZE(n) \
 106         (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
 107
 108 #define DIR_HASH_CK_OK          0
 109 #define DIR_HASH_CK_DUPLEAF     1
 110 #define DIR_HASH_CK_BADHASH     2
 111 #define DIR_HASH_CK_NODATA      3
 112 #define DIR_HASH_CK_NOLEAF      4
 113 #define DIR_HASH_CK_BADSTALE    5
 114 #define DIR_HASH_CK_TOTAL       6
 115
 116 /*
 117  * Need to handle CRC and validation errors specially here. If there is a
 118  * validator error, re-read without the verifier so that we get a buffer we can
 119  * check and repair. Re-attach the ops to the buffer after the read so that when
 120  * it is rewritten the CRC is recalculated.
 121  *
 122  * If the buffer was not read, we return an error. If the buffer was read but
 123  * had a CRC or corruption error, we reread it without the verifier and if it is
 124  * read successfully we increment *crc_error and return 0. Otherwise we
 125  * return the read error.
 126  */
 127 static int
 128 dir_read_buf(
 129         struct xfs_inode        *ip,
 130         xfs_dablk_t             bno,
 131         struct xfs_buf          **bpp,
 132         const struct xfs_buf_ops *ops,
 133         int                     *crc_error)
 134 {
 135         int error;
 136         int error2;
 137
 138         error = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK, ops);
 139
 140         if (error != EFSBADCRC && error != EFSCORRUPTED)
 141                 return error;
 142
 143         error2 = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK,
 144                         NULL);
 145         if (error2)
 146                 return error2;
 147
 148         (*crc_error)++;
 149         (*bpp)->b_ops = ops;
 150         return 0;
 151 }
 152
 153 /*
 154  * Returns 0 if the name already exists (ie. a duplicate)
 155  */
 156 static int
 157 dir_hash_add(
 158         struct xfs_mount        *mp,
 159         struct dir_hash_tab     *hashtab,
 160         uint32_t                addr,
 161         xfs_ino_t               inum,
 162         int                     namelen,
 163         unsigned char           *name,
 164         uint8_t                 ftype)
 165 {
 166         xfs_dahash_t            hash = 0;
 167         int                     byhash = 0;
 168         struct dir_hash_ent     *p;
 169         int                     dup;
 170         short                   junk;
 171         struct xfs_name         xname;
 172         int                     error;
 173
 174         xname.name = name;
 175         xname.len = namelen;
 176         xname.type = ftype;
 177
 178         junk = name[0] == '/';
 179         dup = 0;
 180
 181         if (!junk) {
 182                 hash = libxfs_dir2_hashname(mp, &xname);
 183                 byhash = DIR_HASH_FUNC(hashtab, hash);
 184
 185                 /*
 186                  * search hash bucket for existing name.
 187                  */
 188                 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
 189                         if (p->hashval == hash && p->name.len == namelen) {
 190                                 if (memcmp(p->name.name, name, namelen) == 0) {
 191                                         dup = 1;
 192                                         junk = 1;
 193                                         break;
 194                                 }
 195                         }
 196                 }
 197         }
 198
 199         /*
 200          * Allocate enough space for the hash entry and the name in a single
 201          * allocation so we can store our own copy of the name for later use.
 202          */
 203         p = calloc(1, sizeof(*p) + namelen + 1);
 204         if (!p)
 205                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
 206                         sizeof(*p));
 207
 208         error = radix_tree_insert(&hashtab->byaddr, addr, p);
 209         if (error == EEXIST) {
 210                 do_warn(_("duplicate addrs %u in directory!\n"), addr);
 211                 free(p);
 212                 return 0;
 213         }
 214         radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
 215
 216         if (hashtab->last)
 217                 hashtab->last->nextbyorder = p;
 218         else
 219                 hashtab->first = p;
 220         p->nextbyorder = NULL;
 221         hashtab->last = p;
 222
 223         if (!(p->junkit = junk)) {
 224                 p->hashval = hash;
 225                 p->nextbyhash = hashtab->byhash[byhash];
 226                 hashtab->byhash[byhash] = p;
 227         }
 228         p->address = addr;
 229         p->inum = inum;
 230         p->seen = 0;
 231
 232         /* Set up the name in the region trailing the hash entry. */
 233         memcpy(p->namebuf, name, namelen);
 234         p->name.name = p->namebuf;
 235         p->name.len = namelen;
 236         p->name.type = ftype;
 237         return !dup;
 238 }
 239
 240 /* Mark an existing directory hashtable entry as junk. */
 241 static void
 242 dir_hash_junkit(
 243         struct dir_hash_tab     *hashtab,
 244         xfs_dir2_dataptr_t      addr)
 245 {
 246         struct dir_hash_ent     *p;
 247
 248         p = radix_tree_lookup(&hashtab->byaddr, addr);
 249         assert(p != NULL);
 250
 251         p->junkit = 1;
 252         p->namebuf[0] = '/';
 253 }
 254
 255 static int
 256 dir_hash_check(
 257         struct dir_hash_tab     *hashtab,
 258         struct xfs_inode        *ip,
 259         int                     seeval)
 260 {
 261         static char             *seevalstr[DIR_HASH_CK_TOTAL];
 262         static int              done;
 263
 264         if (!done) {
 265                 seevalstr[DIR_HASH_CK_OK] = _("ok");
 266                 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
 267                 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
 268                 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
 269                 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
 270                 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
 271                 done = 1;
 272         }
 273
 274         if (seeval == DIR_HASH_CK_OK &&
 275             radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
 276                 seeval = DIR_HASH_CK_NOLEAF;
 277         if (seeval == DIR_HASH_CK_OK)
 278                 return 0;
 279         do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
 280                 ip->i_ino, seevalstr[seeval]);
 281         if (!no_modify)
 282                 do_warn(_("rebuilding\n"));
 283         else
 284                 do_warn(_("would rebuild\n"));
 285         return 1;
 286 }
 287
 288 static void
 289 dir_hash_done(
 290         struct dir_hash_tab     *hashtab)
 291 {
 292         int                     i;
 293         struct dir_hash_ent     *n;
 294         struct dir_hash_ent     *p;
 295
 296         for (i = 0; i < hashtab->size; i++) {
 297                 for (p = hashtab->byhash[i]; p; p = n) {
 298                         n = p->nextbyhash;
 299                         radix_tree_delete(&hashtab->byaddr, p->address);
 300                         free(p);
 301                 }
 302         }
 303         free(hashtab);
 304 }
 305
 306 /*
 307  * Create a directory hash index structure based on the size of the directory we
 308  * are about to try to repair. The size passed in is the size of the data
 309  * segment of the directory in bytes, so we don't really know exactly how many
 310  * entries are in it. Hence assume an entry size of around 64 bytes - that's a
 311  * name length of 40+ bytes so should cover a most situations with really large
 312  * directories.
 313  */
 314 static struct dir_hash_tab *
 315 dir_hash_init(
 316         xfs_fsize_t             size)
 317 {
 318         struct dir_hash_tab     *hashtab = NULL;
 319         int                     hsize;
 320
 321         hsize = size / 64;
 322         if (hsize < 16)
 323                 hsize = 16;
 324
 325         /*
 326          * Try to allocate as large a hash table as possible. Failure to
 327          * allocate isn't fatal, it will just result in slower performance as we
 328          * reduce the size of the table.
 329          */
 330         while (hsize >= 16) {
 331                 hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1);
 332                 if (hashtab)
 333                         break;
 334                 hsize /= 2;
 335         }
 336         if (!hashtab)
 337                 do_error(_("calloc failed in dir_hash_init\n"));
 338         hashtab->size = hsize;
 339         hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
 340                 sizeof(struct dir_hash_tab));
 341         INIT_RADIX_TREE(&hashtab->byaddr, 0);
 342         return hashtab;
 343 }
 344
 345 static int
 346 dir_hash_see(
 347         struct dir_hash_tab     *hashtab,
 348         xfs_dahash_t            hash,
 349         xfs_dir2_dataptr_t      addr)
 350 {
 351         struct dir_hash_ent     *p;
 352
 353         p = radix_tree_lookup(&hashtab->byaddr, addr);
 354         if (!p)
 355                 return DIR_HASH_CK_NODATA;
 356         if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
 357                 return DIR_HASH_CK_DUPLEAF;
 358         if (p->junkit == 0 && p->hashval != hash)
 359                 return DIR_HASH_CK_BADHASH;
 360         radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
 361         return DIR_HASH_CK_OK;
 362 }
 363
 364 static void
 365 dir_hash_update_ftype(
 366         struct dir_hash_tab     *hashtab,
 367         xfs_dir2_dataptr_t      addr,
 368         uint8_t                 ftype)
 369 {
 370         struct dir_hash_ent     *p;
 371
 372         p = radix_tree_lookup(&hashtab->byaddr, addr);
 373         if (!p)
 374                 return;
 375         p->name.type = ftype;
 376 }
 377
 378 /*
 379  * checks to make sure leafs match a data entry, and that the stale
 380  * count is valid.
 381  */
 382 static int
 383 dir_hash_see_all(
 384         struct dir_hash_tab     *hashtab,
 385         xfs_dir2_leaf_entry_t   *ents,
 386         int                     count,
 387         int                     stale)
 388 {
 389         int                     i;
 390         int                     j;
 391         int                     rval;
 392
 393         for (i = j = 0; i < count; i++) {
 394                 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
 395                         j++;
 396                         continue;
 397                 }
 398                 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
 399                                         be32_to_cpu(ents[i].address));
 400                 if (rval != DIR_HASH_CK_OK)
 401                         return rval;
 402         }
 403         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
 404 }
 405
 406 /*
 407  * Given a block number in a fork, return the next valid block number (not a
 408  * hole).  If this is the last block number then NULLFILEOFF is returned.
 409  */
 410 static int
 411 bmap_next_offset(
 412         struct xfs_inode        *ip,
 413         xfs_fileoff_t           *bnop)
 414 {
 415         xfs_fileoff_t           bno;
 416         int                     error;
 417         struct xfs_bmbt_irec    got;
 418         struct xfs_iext_cursor  icur;
 419
 420         switch (ip->i_df.if_format) {
 421         case XFS_DINODE_FMT_LOCAL:
 422                 *bnop = NULLFILEOFF;
 423                 return 0;
 424         case XFS_DINODE_FMT_BTREE:
 425         case XFS_DINODE_FMT_EXTENTS:
 426                 break;
 427         default:
 428                 return EIO;
 429         }
 430
 431         /* Read extent map. */
 432         error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
 433         if (error)
 434                 return error;
 435
 436         bno = *bnop + 1;
 437         if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
 438                 *bnop = NULLFILEOFF;
 439         else
 440                 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
 441         return 0;
 442 }
 443
 444 static void
 445 res_failed(
 446         int     err)
 447 {
 448         if (err == ENOSPC) {
 449                 do_error(_("ran out of disk space!\n"));
 450         } else
 451                 do_error(_("xfs_trans_reserve returned %d\n"), err);
 452 }
 453
 454 static inline void
 455 reset_inode_fields(struct xfs_inode *ip)
 456 {
 457         ip->i_projid = 0;
 458         ip->i_disk_size = 0;
 459         ip->i_nblocks = 0;
 460         ip->i_extsize = 0;
 461         ip->i_cowextsize = 0;
 462         ip->i_flushiter = 0;
 463         ip->i_forkoff = 0;
 464         ip->i_diflags = 0;
 465         ip->i_diflags2 = 0;
 466         ip->i_crtime.tv_sec = 0;
 467         ip->i_crtime.tv_nsec = 0;
 468 }
 469
 470 static void
 471 mk_rbmino(xfs_mount_t *mp)
 472 {
 473         xfs_trans_t     *tp;
 474         xfs_inode_t     *ip;
 475         xfs_bmbt_irec_t *ep;
 476         int             i;
 477         int             nmap;
 478         int             error;
 479         xfs_fileoff_t   bno;
 480         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 481         int             times;
 482         uint            blocks;
 483
 484         /*
 485          * first set up inode
 486          */
 487         i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 488         if (i)
 489                 res_failed(i);
 490
 491         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 492         if (error) {
 493                 do_error(
 494                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 495                         error);
 496         }
 497
 498         reset_inode_fields(ip);
 499
 500         VFS_I(ip)->i_mode = S_IFREG;
 501         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 502         if (ip->i_afp)
 503                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 504
 505         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 506
 507         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 508         if (xfs_has_v3inodes(mp)) {
 509                 VFS_I(ip)->i_version = 1;
 510                 ip->i_diflags2 = 0;
 511                 times |= XFS_ICHGTIME_CREATE;
 512         }
 513         libxfs_trans_ichgtime(tp, ip, times);
 514
 515         /*
 516          * now the ifork
 517          */
 518         ip->i_df.if_bytes = 0;
 519         ip->i_df.if_u1.if_root = NULL;
 520
 521         ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
 522
 523         /*
 524          * commit changes
 525          */
 526         libxfs_trans_ijoin(tp, ip, 0);
 527         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 528         error = -libxfs_trans_commit(tp);
 529         if (error)
 530                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 531
 532         /*
 533          * then allocate blocks for file and fill with zeroes (stolen
 534          * from mkfs)
 535          */
 536         blocks = mp->m_sb.sb_rbmblocks +
 537                         XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 538         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 539         if (error)
 540                 res_failed(error);
 541
 542         libxfs_trans_ijoin(tp, ip, 0);
 543         bno = 0;
 544         while (bno < mp->m_sb.sb_rbmblocks) {
 545                 nmap = XFS_BMAP_MAX_NMAP;
 546                 error = -libxfs_bmapi_write(tp, ip, bno,
 547                           (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
 548                           0, mp->m_sb.sb_rbmblocks, map, &nmap);
 549                 if (error) {
 550                         do_error(
 551                         _("couldn't allocate realtime bitmap, error = %d\n"),
 552                                 error);
 553                 }
 554                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 555                         libxfs_device_zero(mp->m_ddev_targp,
 556                                 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 557                                 XFS_FSB_TO_BB(mp, ep->br_blockcount));
 558                         bno += ep->br_blockcount;
 559                 }
 560         }
 561         error = -libxfs_trans_commit(tp);
 562         if (error) {
 563                 do_error(
 564                 _("allocation of the realtime bitmap failed, error = %d\n"),
 565                         error);
 566         }
 567         libxfs_irele(ip);
 568 }
 569
 570 static int
 571 fill_rbmino(xfs_mount_t *mp)
 572 {
 573         struct xfs_buf  *bp;
 574         xfs_trans_t     *tp;
 575         xfs_inode_t     *ip;
 576         xfs_rtword_t    *bmp;
 577         int             nmap;
 578         int             error;
 579         xfs_fileoff_t   bno;
 580         xfs_bmbt_irec_t map;
 581
 582         bmp = btmcompute;
 583         bno = 0;
 584
 585         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 586         if (error)
 587                 res_failed(error);
 588
 589         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
 590         if (error) {
 591                 do_error(
 592                 _("couldn't iget realtime bitmap inode -- error - %d\n"),
 593                         error);
 594         }
 595
 596         while (bno < mp->m_sb.sb_rbmblocks)  {
 597                 /*
 598                  * fill the file one block at a time
 599                  */
 600                 nmap = 1;
 601                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 602                 if (error || nmap != 1) {
 603                         do_error(
 604         _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
 605                                 bno, error);
 606                 }
 607
 608                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 609
 610                 error = -libxfs_trans_read_buf(
 611                                 mp, tp, mp->m_dev,
 612                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 613                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 614
 615                 if (error) {
 616                         do_warn(
 617 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
 618                                 bno, map.br_startblock, mp->m_sb.sb_rbmino);
 619                         return(1);
 620                 }
 621
 622                 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
 623
 624                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 625
 626                 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
 627                 bno++;
 628         }
 629
 630         libxfs_trans_ijoin(tp, ip, 0);
 631         error = -libxfs_trans_commit(tp);
 632         if (error)
 633                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 634         libxfs_irele(ip);
 635         return(0);
 636 }
 637
 638 static int
 639 fill_rsumino(xfs_mount_t *mp)
 640 {
 641         struct xfs_buf  *bp;
 642         xfs_trans_t     *tp;
 643         xfs_inode_t     *ip;
 644         xfs_suminfo_t   *smp;
 645         int             nmap;
 646         int             error;
 647         xfs_fileoff_t   bno;
 648         xfs_fileoff_t   end_bno;
 649         xfs_bmbt_irec_t map;
 650
 651         smp = sumcompute;
 652         bno = 0;
 653         end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 654
 655         error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
 656         if (error)
 657                 res_failed(error);
 658
 659         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 660         if (error) {
 661                 do_error(
 662                 _("couldn't iget realtime summary inode -- error - %d\n"),
 663                         error);
 664         }
 665
 666         while (bno < end_bno)  {
 667                 /*
 668                  * fill the file one block at a time
 669                  */
 670                 nmap = 1;
 671                 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
 672                 if (error || nmap != 1) {
 673                         do_error(
 674         _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
 675                                 bno, error);
 676                 }
 677
 678                 ASSERT(map.br_startblock != HOLESTARTBLOCK);
 679
 680                 error = -libxfs_trans_read_buf(
 681                                 mp, tp, mp->m_dev,
 682                                 XFS_FSB_TO_DADDR(mp, map.br_startblock),
 683                                 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
 684
 685                 if (error) {
 686                         do_warn(
 687 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
 688                                 bno, map.br_startblock, mp->m_sb.sb_rsumino);
 689                         libxfs_irele(ip);
 690                         return(1);
 691                 }
 692
 693                 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
 694
 695                 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
 696
 697                 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
 698                 bno++;
 699         }
 700
 701         libxfs_trans_ijoin(tp, ip, 0);
 702         error = -libxfs_trans_commit(tp);
 703         if (error)
 704                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 705         libxfs_irele(ip);
 706         return(0);
 707 }
 708
 709 static void
 710 mk_rsumino(xfs_mount_t *mp)
 711 {
 712         xfs_trans_t     *tp;
 713         xfs_inode_t     *ip;
 714         xfs_bmbt_irec_t *ep;
 715         int             i;
 716         int             nmap;
 717         int             error;
 718         int             nsumblocks;
 719         xfs_fileoff_t   bno;
 720         xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
 721         int             times;
 722         uint            blocks;
 723
 724         /*
 725          * first set up inode
 726          */
 727         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 728         if (i)
 729                 res_failed(i);
 730
 731         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
 732         if (error) {
 733                 do_error(
 734                 _("couldn't iget realtime summary inode -- error - %d\n"),
 735                         error);
 736         }
 737
 738         reset_inode_fields(ip);
 739
 740         VFS_I(ip)->i_mode = S_IFREG;
 741         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 742         if (ip->i_afp)
 743                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 744
 745         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
 746
 747         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 748         if (xfs_has_v3inodes(mp)) {
 749                 VFS_I(ip)->i_version = 1;
 750                 ip->i_diflags2 = 0;
 751                 times |= XFS_ICHGTIME_CREATE;
 752         }
 753         libxfs_trans_ichgtime(tp, ip, times);
 754
 755         /*
 756          * now the ifork
 757          */
 758         ip->i_df.if_bytes = 0;
 759         ip->i_df.if_u1.if_root = NULL;
 760
 761         ip->i_disk_size = mp->m_rsumsize;
 762
 763         /*
 764          * commit changes
 765          */
 766         libxfs_trans_ijoin(tp, ip, 0);
 767         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 768         error = -libxfs_trans_commit(tp);
 769         if (error)
 770                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 771
 772         /*
 773          * then allocate blocks for file and fill with zeroes (stolen
 774          * from mkfs)
 775          */
 776         nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
 777         blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
 778         error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
 779         if (error)
 780                 res_failed(error);
 781
 782         libxfs_trans_ijoin(tp, ip, 0);
 783         bno = 0;
 784         while (bno < nsumblocks) {
 785                 nmap = XFS_BMAP_MAX_NMAP;
 786                 error = -libxfs_bmapi_write(tp, ip, bno,
 787                           (xfs_extlen_t)(nsumblocks - bno),
 788                           0, nsumblocks, map, &nmap);
 789                 if (error) {
 790                         do_error(
 791                 _("couldn't allocate realtime summary inode, error = %d\n"),
 792                                 error);
 793                 }
 794                 for (i = 0, ep = map; i < nmap; i++, ep++) {
 795                         libxfs_device_zero(mp->m_ddev_targp,
 796                                       XFS_FSB_TO_DADDR(mp, ep->br_startblock),
 797                                       XFS_FSB_TO_BB(mp, ep->br_blockcount));
 798                         bno += ep->br_blockcount;
 799                 }
 800         }
 801         error = -libxfs_trans_commit(tp);
 802         if (error) {
 803                 do_error(
 804         _("allocation of the realtime summary ino failed, error = %d\n"),
 805                         error);
 806         }
 807         libxfs_irele(ip);
 808 }
 809
 810 /*
 811  * makes a new root directory.
 812  */
 813 static void
 814 mk_root_dir(xfs_mount_t *mp)
 815 {
 816         xfs_trans_t     *tp;
 817         xfs_inode_t     *ip;
 818         int             i;
 819         int             error;
 820         const mode_t    mode = 0755;
 821         ino_tree_node_t *irec;
 822         int             times;
 823
 824         ip = NULL;
 825         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
 826         if (i)
 827                 res_failed(i);
 828
 829         error = -libxfs_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
 830         if (error) {
 831                 do_error(_("could not iget root inode -- error - %d\n"), error);
 832         }
 833
 834         /*
 835          * take care of the core -- initialization from xfs_ialloc()
 836          */
 837         reset_inode_fields(ip);
 838
 839         VFS_I(ip)->i_mode = mode|S_IFDIR;
 840         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 841         if (ip->i_afp)
 842                 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
 843
 844         set_nlink(VFS_I(ip), 2);        /* account for . and .. */
 845
 846         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
 847         if (xfs_has_v3inodes(mp)) {
 848                 VFS_I(ip)->i_version = 1;
 849                 ip->i_diflags2 = 0;
 850                 times |= XFS_ICHGTIME_CREATE;
 851         }
 852         libxfs_trans_ichgtime(tp, ip, times);
 853         libxfs_trans_ijoin(tp, ip, 0);
 854         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 855
 856         /*
 857          * now the ifork
 858          */
 859         ip->i_df.if_bytes = 0;
 860         ip->i_df.if_u1.if_root = NULL;
 861
 862         /*
 863          * initialize the directory
 864          */
 865         libxfs_dir_init(tp, ip, ip);
 866
 867         error = -libxfs_trans_commit(tp);
 868         if (error)
 869                 do_error(_("%s: commit failed, error %d\n"), __func__, error);
 870
 871         libxfs_irele(ip);
 872
 873         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 874                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 875         set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
 876                                 irec->ino_startnum);
 877 }
 878
 879 /*
 880  * orphanage name == lost+found
 881  */
 882 static xfs_ino_t
 883 mk_orphanage(xfs_mount_t *mp)
 884 {
 885         xfs_ino_t       ino;
 886         xfs_trans_t     *tp;
 887         xfs_inode_t     *ip;
 888         xfs_inode_t     *pip;
 889         ino_tree_node_t *irec;
 890         int             ino_offset = 0;
 891         int             i;
 892         int             error;
 893         const int       mode = 0755;
 894         int             nres;
 895         struct xfs_name xname;
 896
 897         /*
 898          * check for an existing lost+found first, if it exists, return
 899          * its inode. Otherwise, we can create it. Bad lost+found inodes
 900          * would have been cleared in phase3 and phase4.
 901          */
 902
 903         i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 904         if (i)
 905                 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
 906                         i, ORPHANAGE);
 907
 908         xname.name = (unsigned char *)ORPHANAGE;
 909         xname.len = strlen(ORPHANAGE);
 910         xname.type = XFS_DIR3_FT_DIR;
 911
 912         if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
 913                 return ino;
 914
 915         /*
 916          * could not be found, create it
 917          */
 918         nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
 919         i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
 920         if (i)
 921                 res_failed(i);
 922
 923         /*
 924          * use iget/ijoin instead of trans_iget because the ialloc
 925          * wrapper can commit the transaction and start a new one
 926          */
 927 /*      i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
 928         if (i)
 929                 do_error(_("%d - couldn't iget root inode to make %s\n"),
 930                         i, ORPHANAGE);*/
 931
 932         error = -libxfs_dir_ialloc(&tp, pip, mode|S_IFDIR,
 933                                         1, 0, &zerocr, &zerofsx, &ip);
 934         if (error) {
 935                 do_error(_("%s inode allocation failed %d\n"),
 936                         ORPHANAGE, error);
 937         }
 938         inc_nlink(VFS_I(ip));           /* account for . */
 939         ino = ip->i_ino;
 940
 941         irec = find_inode_rec(mp,
 942                         XFS_INO_TO_AGNO(mp, ino),
 943                         XFS_INO_TO_AGINO(mp, ino));
 944
 945         if (irec == NULL) {
 946                 /*
 947                  * This inode is allocated from a newly created inode
 948                  * chunk and therefore did not exist when inode chunks
 949                  * were processed in phase3. Add this group of inodes to
 950                  * the entry avl tree as if they were discovered in phase3.
 951                  */
 952                 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
 953                                             XFS_INO_TO_AGINO(mp, ino));
 954                 alloc_ex_data(irec);
 955
 956                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
 957                         set_inode_free(irec, i);
 958         }
 959
 960         ino_offset = get_inode_offset(mp, ino, irec);
 961
 962         /*
 963          * Mark the inode allocated to lost+found as used in the AVL tree
 964          * so it is not skipped in phase 7
 965          */
 966         set_inode_used(irec, ino_offset);
 967         add_inode_ref(irec, ino_offset);
 968         add_inode_reached(irec, ino_offset);
 969
 970         /*
 971          * now that we know the transaction will stay around,
 972          * add the root inode to it
 973          */
 974         libxfs_trans_ijoin(tp, pip, 0);
 975
 976         /*
 977          * create the actual entry
 978          */
 979         error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
 980         if (error)
 981                 do_error(
 982                 _("can't make %s, createname error %d\n"),
 983                         ORPHANAGE, error);
 984
 985         /*
 986          * bump up the link count in the root directory to account
 987          * for .. in the new directory, and update the irec copy of the
 988          * on-disk nlink so we don't fail the link count check later.
 989          */
 990         inc_nlink(VFS_I(pip));
 991         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
 992                                   XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
 993         add_inode_ref(irec, 0);
 994         set_inode_disk_nlinks(irec, 0, get_inode_disk_nlinks(irec, 0) + 1);
 995
 996         libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
 997         libxfs_dir_init(tp, ip, pip);
 998         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 999         error = -libxfs_trans_commit(tp);
1000         if (error) {
1001                 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1002                         ORPHANAGE, error);
1003         }
1004         libxfs_irele(ip);
1005         libxfs_irele(pip);
1006
1007         return(ino);
1008 }
1009
1010 /*
1011  * move a file to the orphange.
1012  */
1013 static void
1014 mv_orphanage(
1015         xfs_mount_t             *mp,
1016         xfs_ino_t               ino,            /* inode # to be moved */
1017         int                     isa_dir)        /* 1 if inode is a directory */
1018 {
1019         xfs_inode_t             *orphanage_ip;
1020         xfs_ino_t               entry_ino_num;
1021         xfs_inode_t             *ino_p;
1022         xfs_trans_t             *tp;
1023         int                     err;
1024         unsigned char           fname[MAXPATHLEN + 1];
1025         int                     nres;
1026         int                     incr;
1027         ino_tree_node_t         *irec;
1028         int                     ino_offset = 0;
1029         struct xfs_name         xname;
1030
1031         xname.name = fname;
1032         xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1033                                 (unsigned long long)ino);
1034
1035         err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip);
1036         if (err)
1037                 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1038         /*
1039          * Make sure the filename is unique in the lost+found
1040          */
1041         incr = 0;
1042         while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1043                                                                 NULL) == 0)
1044                 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1045                                         (unsigned long long)ino, ++incr);
1046
1047         /* Orphans may not have a proper parent, so use custom ops here */
1048         err = -libxfs_iget(mp, NULL, ino, 0, &ino_p);
1049         if (err)
1050                 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1051
1052         xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1053
1054         if (isa_dir)  {
1055                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1056                                 XFS_INO_TO_AGINO(mp, orphanage_ino));
1057                 if (irec)
1058                         ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1059                                         irec->ino_startnum;
1060                 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1061                        XFS_DIRENTER_SPACE_RES(mp, 2);
1062                 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1063                                         &entry_ino_num, NULL);
1064                 if (err) {
1065                         ASSERT(err == ENOENT);
1066
1067                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1068                                                   nres, 0, 0, &tp);
1069                         if (err)
1070                                 do_error(
1071         _("space reservation failed (%d), filesystem may be out of space\n"),
1072                                         err);
1073
1074                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1075                         libxfs_trans_ijoin(tp, ino_p, 0);
1076
1077                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1078                                                 ino, nres);
1079                         if (err)
1080                                 do_error(
1081         _("name create failed in %s (%d), filesystem may be out of space\n"),
1082                                         ORPHANAGE, err);
1083
1084                         if (irec)
1085                                 add_inode_ref(irec, ino_offset);
1086                         else
1087                                 inc_nlink(VFS_I(orphanage_ip));
1088                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1089
1090                         err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1091                                         orphanage_ino, nres);
1092                         if (err)
1093                                 do_error(
1094         _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1095                                         err);
1096
1097                         inc_nlink(VFS_I(ino_p));
1098                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1099                         err = -libxfs_trans_commit(tp);
1100                         if (err)
1101                                 do_error(
1102         _("creation of .. entry failed (%d)\n"), err);
1103                 } else  {
1104                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1105                                                   nres, 0, 0, &tp);
1106                         if (err)
1107                                 do_error(
1108         _("space reservation failed (%d), filesystem may be out of space\n"),
1109                                         err);
1110
1111                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
1112                         libxfs_trans_ijoin(tp, ino_p, 0);
1113
1114
1115                         err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1116                                                 ino, nres);
1117                         if (err)
1118                                 do_error(
1119         _("name create failed in %s (%d), filesystem may be out of space\n"),
1120                                         ORPHANAGE, err);
1121
1122                         if (irec)
1123                                 add_inode_ref(irec, ino_offset);
1124                         else
1125                                 inc_nlink(VFS_I(orphanage_ip));
1126                         libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1127
1128                         /*
1129                          * don't replace .. value if it already points
1130                          * to us.  that'll pop a libxfs/kernel ASSERT.
1131                          */
1132                         if (entry_ino_num != orphanage_ino)  {
1133                                 err = -libxfs_dir_replace(tp, ino_p,
1134                                                 &xfs_name_dotdot, orphanage_ino,
1135                                                 nres);
1136                                 if (err)
1137                                         do_error(
1138         _("name replace op failed (%d), filesystem may be out of space\n"),
1139                                                 err);
1140                         }
1141
1142                         err = -libxfs_trans_commit(tp);
1143                         if (err)
1144                                 do_error(
1145         _("orphanage name replace op failed (%d)\n"), err);
1146                 }
1147
1148         } else  {
1149                 /*
1150                  * use the remove log reservation as that's
1151                  * more accurate.  we're only creating the
1152                  * links, we're not doing the inode allocation
1153                  * also accounted for in the create
1154                  */
1155                 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1156                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1157                                           nres, 0, 0, &tp);
1158                 if (err)
1159                         do_error(
1160         _("space reservation failed (%d), filesystem may be out of space\n"),
1161                                 err);
1162
1163                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1164                 libxfs_trans_ijoin(tp, ino_p, 0);
1165
1166                 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1167                                                 nres);
1168                 if (err)
1169                         do_error(
1170         _("name create failed in %s (%d), filesystem may be out of space\n"),
1171                                 ORPHANAGE, err);
1172                 ASSERT(err == 0);
1173
1174                 set_nlink(VFS_I(ino_p), 1);
1175                 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1176                 err = -libxfs_trans_commit(tp);
1177                 if (err)
1178                         do_error(
1179         _("orphanage name create failed (%d)\n"), err);
1180         }
1181         libxfs_irele(ino_p);
1182         libxfs_irele(orphanage_ip);
1183 }
1184
1185 static int
1186 entry_junked(
1187         const char      *msg,
1188         const char      *iname,
1189         xfs_ino_t       ino1,
1190         xfs_ino_t       ino2)
1191 {
1192         do_warn(msg, iname, ino1, ino2);
1193         if (!no_modify) {
1194                 if (verbose)
1195                         do_warn(_(", marking entry to be junked\n"));
1196                 else
1197                         do_warn("\n");
1198         } else
1199                 do_warn(_(", would junk entry\n"));
1200         return !no_modify;
1201 }
1202
1203 /* Find and invalidate all the directory's buffers. */
1204 static int
1205 dir_binval(
1206         struct xfs_trans        *tp,
1207         struct xfs_inode        *ip,
1208         int                     whichfork)
1209 {
1210         struct xfs_iext_cursor  icur;
1211         struct xfs_bmbt_irec    rec;
1212         struct xfs_ifork        *ifp;
1213         struct xfs_da_geometry  *geo;
1214         struct xfs_buf          *bp;
1215         xfs_dablk_t             dabno;
1216         int                     error = 0;
1217
1218         if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
1219             ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
1220                 return 0;
1221
1222         geo = tp->t_mountp->m_dir_geo;
1223         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1224         for_each_xfs_iext(ifp, &icur, &rec) {
1225                 for (dabno = roundup(rec.br_startoff, geo->fsbcount);
1226                      dabno < rec.br_startoff + rec.br_blockcount;
1227                      dabno += geo->fsbcount) {
1228                         bp = NULL;
1229                         error = -libxfs_da_get_buf(tp, ip, dabno, &bp,
1230                                         whichfork);
1231                         if (error)
1232                                 return error;
1233                         if (!bp)
1234                                 continue;
1235                         libxfs_trans_binval(tp, bp);
1236                         libxfs_trans_brelse(tp, bp);
1237                 }
1238         }
1239
1240         return error;
1241 }
1242
1243 /*
1244  * Unexpected failure during the rebuild will leave the entries in
1245  * lost+found on the next run
1246  */
1247
1248 static void
1249 longform_dir2_rebuild(
1250         struct xfs_mount        *mp,
1251         xfs_ino_t               ino,
1252         struct xfs_inode        *ip,
1253         struct ino_tree_node    *irec,
1254         int                     ino_offset,
1255         struct dir_hash_tab     *hashtab)
1256 {
1257         int                     error;
1258         int                     nres;
1259         struct xfs_trans        *tp;
1260         xfs_fileoff_t           lastblock;
1261         struct xfs_inode        pip;
1262         struct dir_hash_ent     *p;
1263         int                     done = 0;
1264
1265         /*
1266          * trash directory completely and rebuild from scratch using the
1267          * name/inode pairs in the hash table
1268          */
1269
1270         do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1271
1272         /*
1273          * first attempt to locate the parent inode, if it can't be
1274          * found, set it to the root inode and it'll be moved to the
1275          * orphanage later (the inode number here needs to be valid
1276          * for the libxfs_dir_init() call).
1277          */
1278         pip.i_ino = get_inode_parent(irec, ino_offset);
1279         if (pip.i_ino == NULLFSINO ||
1280             libxfs_dir_ino_validate(mp, pip.i_ino))
1281                 pip.i_ino = mp->m_sb.sb_rootino;
1282
1283         nres = XFS_REMOVE_SPACE_RES(mp);
1284         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1285         if (error)
1286                 res_failed(error);
1287         libxfs_trans_ijoin(tp, ip, 0);
1288
1289         error = dir_binval(tp, ip, XFS_DATA_FORK);
1290         if (error)
1291                 do_error(_("error %d invalidating directory %llu blocks\n"),
1292                                 error, (unsigned long long)ip->i_ino);
1293
1294         if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1295                 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1296                         error);
1297
1298         /* free all data, leaf, node and freespace blocks */
1299         while (!done) {
1300                error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA,
1301                                        0, &done);
1302                if (error) {
1303                        do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1304                        goto out_bmap_cancel;
1305                }
1306                error = -libxfs_defer_finish(&tp);
1307                if (error) {
1308                        do_warn(("defer_finish failed -- error - %d\n"), error);
1309                        goto out_bmap_cancel;
1310                }
1311                /*
1312                 * Close out trans and start the next one in the chain.
1313                 */
1314                error = -libxfs_trans_roll_inode(&tp, ip);
1315                if (error)
1316                         goto out_bmap_cancel;
1317         }
1318
1319         error = -libxfs_dir_init(tp, ip, &pip);
1320         if (error) {
1321                 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1322                 goto out_bmap_cancel;
1323         }
1324
1325         error = -libxfs_trans_commit(tp);
1326         if (error)
1327                 do_error(
1328         _("dir init failed (%d)\n"), error);
1329
1330         if (ino == mp->m_sb.sb_rootino)
1331                 need_root_dotdot = 0;
1332
1333         /* go through the hash list and re-add the inodes */
1334
1335         for (p = hashtab->first; p; p = p->nextbyorder) {
1336
1337                 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1338                                 (p->name.len == 1 || (p->name.len == 2 &&
1339                                                 p->name.name[1] == '.'))))
1340                         continue;
1341
1342                 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1343                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1344                                             nres, 0, 0, &tp);
1345                 if (error)
1346                         res_failed(error);
1347
1348                 libxfs_trans_ijoin(tp, ip, 0);
1349
1350                 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1351                                                 nres);
1352                 if (error) {
1353                         do_warn(
1354 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1355                                 ino, error);
1356                         goto out_bmap_cancel;
1357                 }
1358
1359                 error = -libxfs_trans_commit(tp);
1360                 if (error)
1361                         do_error(
1362 _("name create failed (%d) during rebuild\n"), error);
1363         }
1364
1365         return;
1366
1367 out_bmap_cancel:
1368         libxfs_trans_cancel(tp);
1369         return;
1370 }
1371
1372
1373 /*
1374  * Kill a block in a version 2 inode.
1375  * Makes its own transaction.
1376  */
1377 static void
1378 dir2_kill_block(
1379         xfs_mount_t     *mp,
1380         xfs_inode_t     *ip,
1381         xfs_dablk_t     da_bno,
1382         struct xfs_buf  *bp)
1383 {
1384         xfs_da_args_t   args;
1385         int             error;
1386         int             nres;
1387         xfs_trans_t     *tp;
1388
1389         nres = XFS_REMOVE_SPACE_RES(mp);
1390         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1391         if (error)
1392                 res_failed(error);
1393         libxfs_trans_ijoin(tp, ip, 0);
1394         libxfs_trans_bjoin(tp, bp);
1395         libxfs_trans_bhold(tp, bp);
1396         memset(&args, 0, sizeof(args));
1397         args.dp = ip;
1398         args.trans = tp;
1399         args.whichfork = XFS_DATA_FORK;
1400         args.geo = mp->m_dir_geo;
1401         if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1402                 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1403         else
1404                 error = -libxfs_dir2_shrink_inode(&args,
1405                                 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1406         if (error)
1407                 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1408                         ip->i_ino, da_bno);
1409         error = -libxfs_trans_commit(tp);
1410         if (error)
1411                 do_error(
1412 _("directory shrink failed (%d)\n"), error);
1413 }
1414
1415 /*
1416  * process a data block, also checks for .. entry
1417  * and corrects it to match what we think .. should be
1418  */
1419 static void
1420 longform_dir2_entry_check_data(
1421         struct xfs_mount        *mp,
1422         struct xfs_inode        *ip,
1423         int                     *num_illegal,
1424         int                     *need_dot,
1425         struct ino_tree_node    *current_irec,
1426         int                     current_ino_offset,
1427         struct xfs_buf          *bp,
1428         struct dir_hash_tab     *hashtab,
1429         freetab_t               **freetabp,
1430         xfs_dablk_t             da_bno,
1431         int                     isblock)
1432 {
1433         xfs_dir2_dataptr_t      addr;
1434         xfs_dir2_leaf_entry_t   *blp;
1435         xfs_dir2_block_tail_t   *btp;
1436         struct xfs_dir2_data_hdr *d;
1437         xfs_dir2_db_t           db;
1438         xfs_dir2_data_entry_t   *dep;
1439         xfs_dir2_data_unused_t  *dup;
1440         struct xfs_dir2_data_free *bf;
1441         char                    *endptr;
1442         int                     error;
1443         char                    fname[MAXNAMELEN + 1];
1444         freetab_t               *freetab;
1445         int                     i;
1446         int                     ino_offset;
1447         xfs_ino_t               inum;
1448         ino_tree_node_t         *irec;
1449         int                     junkit;
1450         int                     lastfree;
1451         int                     len;
1452         int                     nbad;
1453         int                     needlog;
1454         int                     needscan;
1455         xfs_ino_t               parent;
1456         char                    *ptr;
1457         xfs_trans_t             *tp;
1458         int                     wantmagic;
1459         struct xfs_da_args      da = {
1460                 .dp = ip,
1461                 .geo = mp->m_dir_geo,
1462         };
1463
1464
1465         d = bp->b_addr;
1466         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1467         nbad = 0;
1468         needscan = needlog = 0;
1469         junkit = 0;
1470         freetab = *freetabp;
1471         if (isblock) {
1472                 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1473                 blp = xfs_dir2_block_leaf_p(btp);
1474                 endptr = (char *)blp;
1475                 if (endptr > (char *)btp)
1476                         endptr = (char *)btp;
1477                 if (xfs_has_crc(mp))
1478                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
1479                 else
1480                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
1481         } else {
1482                 endptr = (char *)d + mp->m_dir_geo->blksize;
1483                 if (xfs_has_crc(mp))
1484                         wantmagic = XFS_DIR3_DATA_MAGIC;
1485                 else
1486                         wantmagic = XFS_DIR2_DATA_MAGIC;
1487         }
1488         db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1489
1490         /* check for data block beyond expected end */
1491         if (freetab->naents <= db) {
1492                 struct freetab_ent e;
1493
1494                 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1495                 if (!freetab) {
1496                         do_error(_("realloc failed in %s (%zu bytes)\n"),
1497                                 __func__, FREETAB_SIZE(db + 1));
1498                 }
1499                 e.v = NULLDATAOFF;
1500                 e.s = 0;
1501                 for (i = freetab->naents; i < db; i++)
1502                         freetab->ents[i] = e;
1503                 freetab->naents = db + 1;
1504         }
1505
1506         /* check the data block */
1507         while (ptr < endptr) {
1508
1509                 /* check for freespace */
1510                 dup = (xfs_dir2_data_unused_t *)ptr;
1511                 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1512
1513                         /* check for invalid freespace length */
1514                         if (ptr + be16_to_cpu(dup->length) > endptr ||
1515                                         be16_to_cpu(dup->length) == 0 ||
1516                                         (be16_to_cpu(dup->length) &
1517                                                 (XFS_DIR2_DATA_ALIGN - 1)))
1518                                 break;
1519
1520                         /* check for invalid tag */
1521                         if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1522                                                 (char *)dup - (char *)d)
1523                                 break;
1524
1525                         /* check for block with no data entries */
1526                         if ((ptr == (char *)d + mp->m_dir_geo->data_entry_offset) &&
1527                             (ptr + be16_to_cpu(dup->length) >= endptr)) {
1528                                 junkit = 1;
1529                                 *num_illegal += 1;
1530                                 break;
1531                         }
1532
1533                         /* continue at the end of the freespace */
1534                         ptr += be16_to_cpu(dup->length);
1535                         if (ptr >= endptr)
1536                                 break;
1537                 }
1538
1539                 /* validate data entry size */
1540                 dep = (xfs_dir2_data_entry_t *)ptr;
1541                 if (ptr + libxfs_dir2_data_entsize(mp, dep->namelen) > endptr)
1542                         break;
1543                 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp, dep)) !=
1544                                                 (char *)dep - (char *)d)
1545                         break;
1546                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1547         }
1548
1549         /* did we find an empty or corrupt block? */
1550         if (ptr != endptr) {
1551                 if (junkit) {
1552                         do_warn(
1553         _("empty data block %u in directory inode %" PRIu64 ": "),
1554                                 da_bno, ip->i_ino);
1555                 } else {
1556                         do_warn(_
1557         ("corrupt block %u in directory inode %" PRIu64 ": "),
1558                                 da_bno, ip->i_ino);
1559                 }
1560                 if (!no_modify) {
1561                         do_warn(_("junking block\n"));
1562                         dir2_kill_block(mp, ip, da_bno, bp);
1563                 } else {
1564                         do_warn(_("would junk block\n"));
1565                 }
1566                 freetab->ents[db].v = NULLDATAOFF;
1567                 return;
1568         }
1569
1570         /* update number of data blocks processed */
1571         if (freetab->nents < db + 1)
1572                 freetab->nents = db + 1;
1573
1574         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1575         if (error)
1576                 res_failed(error);
1577         da.trans = tp;
1578         libxfs_trans_ijoin(tp, ip, 0);
1579         libxfs_trans_bjoin(tp, bp);
1580         libxfs_trans_bhold(tp, bp);
1581         if (be32_to_cpu(d->magic) != wantmagic) {
1582                 do_warn(
1583         _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1584                         be32_to_cpu(d->magic), ip->i_ino, da_bno);
1585                 if (!no_modify) {
1586                         do_warn(_("fixing magic # to %#x\n"), wantmagic);
1587                         d->magic = cpu_to_be32(wantmagic);
1588                         needlog = 1;
1589                 } else
1590                         do_warn(_("would fix magic # to %#x\n"), wantmagic);
1591         }
1592         lastfree = 0;
1593         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1594         /*
1595          * look at each entry.  reference inode pointed to by each
1596          * entry in the incore inode tree.
1597          * if not a directory, set reached flag, increment link count
1598          * if a directory and reached, mark entry as to be deleted.
1599          * if a directory, check to see if recorded parent
1600          *      matches current inode #,
1601          *      if so, then set reached flag, increment link count
1602          *              of current and child dir inodes, push the child
1603          *              directory inode onto the directory stack.
1604          *      if current inode != parent, then mark entry to be deleted.
1605          */
1606         while (ptr < endptr) {
1607                 dup = (xfs_dir2_data_unused_t *)ptr;
1608                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1609                         if (lastfree) {
1610                                 do_warn(
1611         _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1612                                         ip->i_ino, da_bno);
1613                                 if (!no_modify) {
1614
1615                                         do_warn(_("joining together\n"));
1616                                         len = be16_to_cpu(dup->length);
1617                                         libxfs_dir2_data_use_free(&da, bp, dup,
1618                                                 ptr - (char *)d, len, &needlog,
1619                                                 &needscan);
1620                                         libxfs_dir2_data_make_free(&da, bp,
1621                                                 ptr - (char *)d, len, &needlog,
1622                                                 &needscan);
1623                                 } else
1624                                         do_warn(_("would join together\n"));
1625                         }
1626                         ptr += be16_to_cpu(dup->length);
1627                         lastfree = 1;
1628                         continue;
1629                 }
1630                 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1631                                                   ptr - (char *)d);
1632                 dep = (xfs_dir2_data_entry_t *)ptr;
1633                 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1634                 inum = be64_to_cpu(dep->inumber);
1635                 lastfree = 0;
1636                 /*
1637                  * skip bogus entries (leading '/').  they'll be deleted
1638                  * later.  must still log it, else we leak references to
1639                  * buffers.
1640                  */
1641                 if (dep->name[0] == '/')  {
1642                         nbad++;
1643                         if (!no_modify)
1644                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1645                         continue;
1646                 }
1647
1648                 memmove(fname, dep->name, dep->namelen);
1649                 fname[dep->namelen] = '\0';
1650                 ASSERT(inum != NULLFSINO);
1651
1652                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1653                                         XFS_INO_TO_AGINO(mp, inum));
1654                 if (irec == NULL)  {
1655                         nbad++;
1656                         if (entry_junked(
1657         _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1658                                         fname, ip->i_ino, inum)) {
1659                                 dep->name[0] = '/';
1660                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1661                         }
1662                         continue;
1663                 }
1664                 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1665
1666                 /*
1667                  * if it's a free inode, blow out the entry.
1668                  * by now, any inode that we think is free
1669                  * really is free.
1670                  */
1671                 if (is_inode_free(irec, ino_offset))  {
1672                         nbad++;
1673                         if (entry_junked(
1674         _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1675                                         fname, ip->i_ino, inum)) {
1676                                 dep->name[0] = '/';
1677                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1678                         }
1679                         continue;
1680                 }
1681
1682                 /*
1683                  * check if this inode is lost+found dir in the root
1684                  */
1685                 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1686                         /*
1687                          * if it's not a directory, trash it
1688                          */
1689                         if (!inode_isadir(irec, ino_offset)) {
1690                                 nbad++;
1691                                 if (entry_junked(
1692         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1693                                                 ORPHANAGE, inum, ip->i_ino)) {
1694                                         dep->name[0] = '/';
1695                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1696                                 }
1697                                 continue;
1698                         }
1699                         /*
1700                          * if this is a dup, it will be picked up below,
1701                          * otherwise, mark it as the orphanage for later.
1702                          */
1703                         if (!orphanage_ino)
1704                                 orphanage_ino = inum;
1705                 }
1706
1707                 /*
1708                  * check for duplicate names in directory.
1709                  */
1710                 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1711                                 dep->name, libxfs_dir2_data_get_ftype(mp, dep))) {
1712                         nbad++;
1713                         if (entry_junked(
1714         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1715                                         fname, inum, ip->i_ino)) {
1716                                 dep->name[0] = '/';
1717                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1718                         }
1719                         if (inum == orphanage_ino)
1720                                 orphanage_ino = 0;
1721                         continue;
1722                 }
1723
1724                 /*
1725                  * if just scanning to rebuild a directory due to a ".."
1726                  * update, just continue
1727                  */
1728                 if (dotdot_update)
1729                         continue;
1730
1731                 /*
1732                  * skip the '..' entry since it's checked when the
1733                  * directory is reached by something else.  if it never
1734                  * gets reached, it'll be moved to the orphanage and we'll
1735                  * take care of it then. If it doesn't exist at all, the
1736                  * directory needs to be rebuilt first before being added
1737                  * to the orphanage.
1738                  */
1739                 if (dep->namelen == 2 && dep->name[0] == '.' &&
1740                                 dep->name[1] == '.') {
1741                         if (da_bno != 0) {
1742                                 /* ".." should be in the first block */
1743                                 nbad++;
1744                                 if (entry_junked(
1745         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1746                                                 inum, ip->i_ino)) {
1747                                         dir_hash_junkit(hashtab, addr);
1748                                         dep->name[0] = '/';
1749                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1750                                 }
1751                         }
1752                         continue;
1753                 }
1754                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
1755                 /*
1756                  * special case the . entry.  we know there's only one
1757                  * '.' and only '.' points to itself because bogus entries
1758                  * got trashed in phase 3 if there were > 1.
1759                  * bump up link count for '.' but don't set reached
1760                  * until we're actually reached by another directory
1761                  * '..' is already accounted for or will be taken care
1762                  * of when directory is moved to orphanage.
1763                  */
1764                 if (ip->i_ino == inum)  {
1765                         ASSERT(no_modify ||
1766                                (dep->name[0] == '.' && dep->namelen == 1));
1767                         add_inode_ref(current_irec, current_ino_offset);
1768                         if (da_bno != 0 ||
1769                             dep != (void *)d + mp->m_dir_geo->data_entry_offset) {
1770                                 /* "." should be the first entry */
1771                                 nbad++;
1772                                 if (entry_junked(
1773         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1774                                                 fname, inum, ip->i_ino)) {
1775                                         dir_hash_junkit(hashtab, addr);
1776                                         dep->name[0] = '/';
1777                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1778                                 }
1779                         }
1780                         *need_dot = 0;
1781                         continue;
1782                 }
1783                 /*
1784                  * skip entries with bogus inumbers if we're in no modify mode
1785                  */
1786                 if (no_modify && !libxfs_verify_dir_ino(mp, inum))
1787                         continue;
1788
1789                 /* validate ftype field if supported */
1790                 if (xfs_has_ftype(mp)) {
1791                         uint8_t dir_ftype;
1792                         uint8_t ino_ftype;
1793
1794                         dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
1795                         ino_ftype = get_inode_ftype(irec, ino_offset);
1796
1797                         if (dir_ftype != ino_ftype) {
1798                                 if (no_modify) {
1799                                         do_warn(
1800         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1801                                                 dir_ftype, ino_ftype,
1802                                                 ip->i_ino, inum);
1803                                 } else {
1804                                         do_warn(
1805         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1806                                                 dir_ftype, ino_ftype,
1807                                                 ip->i_ino, inum);
1808                                         libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
1809                                         libxfs_dir2_data_log_entry(&da, bp, dep);
1810                                         dir_hash_update_ftype(hashtab, addr,
1811                                                               ino_ftype);
1812                                 }
1813                         }
1814                 }
1815
1816                 /*
1817                  * check easy case first, regular inode, just bump
1818                  * the link count and continue
1819                  */
1820                 if (!inode_isadir(irec, ino_offset))  {
1821                         add_inode_reached(irec, ino_offset);
1822                         continue;
1823                 }
1824                 parent = get_inode_parent(irec, ino_offset);
1825                 ASSERT(parent != 0);
1826                 junkit = 0;
1827                 /*
1828                  * bump up the link counts in parent and child
1829                  * directory but if the link doesn't agree with
1830                  * the .. in the child, blow out the entry.
1831                  * if the directory has already been reached,
1832                  * blow away the entry also.
1833                  */
1834                 if (is_inode_reached(irec, ino_offset))  {
1835                         junkit = 1;
1836                         do_warn(
1837 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1838                                 fname, ip->i_ino, inum);
1839                 } else if (parent == ip->i_ino)  {
1840                         add_inode_reached(irec, ino_offset);
1841                         add_inode_ref(current_irec, current_ino_offset);
1842                 } else if (parent == NULLFSINO) {
1843                         /* ".." was missing, but this entry refers to it,
1844                            so, set it as the parent and mark for rebuild */
1845                         do_warn(
1846         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1847                                 fname, ip->i_ino, inum);
1848                         set_inode_parent(irec, ino_offset, ip->i_ino);
1849                         add_inode_reached(irec, ino_offset);
1850                         add_inode_ref(current_irec, current_ino_offset);
1851                         add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1852                                                                 ino_offset);
1853                 } else  {
1854                         junkit = 1;
1855                         do_warn(
1856 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1857                                 fname, ip->i_ino, parent, inum);
1858                 }
1859                 if (junkit)  {
1860                         if (inum == orphanage_ino)
1861                                 orphanage_ino = 0;
1862                         nbad++;
1863                         if (!no_modify)  {
1864                                 dir_hash_junkit(hashtab, addr);
1865                                 dep->name[0] = '/';
1866                                 libxfs_dir2_data_log_entry(&da, bp, dep);
1867                                 if (verbose)
1868                                         do_warn(
1869                                         _("\twill clear entry \"%s\"\n"),
1870                                                 fname);
1871                         } else  {
1872                                 do_warn(_("\twould clear entry \"%s\"\n"),
1873                                         fname);
1874                         }
1875                 }
1876         }
1877         *num_illegal += nbad;
1878         if (needscan)
1879                 libxfs_dir2_data_freescan(mp, d, &i);
1880         if (needlog)
1881                 libxfs_dir2_data_log_header(&da, bp);
1882         error = -libxfs_trans_commit(tp);
1883         if (error)
1884                 do_error(
1885 _("directory block fixing failed (%d)\n"), error);
1886
1887         /* record the largest free space in the freetab for later checking */
1888         bf = libxfs_dir2_data_bestfree_p(mp, d);
1889         freetab->ents[db].v = be16_to_cpu(bf[0].length);
1890         freetab->ents[db].s = 0;
1891 }
1892
1893 /* check v5 metadata */
1894 static int
1895 __check_dir3_header(
1896         struct xfs_mount        *mp,
1897         struct xfs_buf          *bp,
1898         xfs_ino_t               ino,
1899         __be64                  owner,
1900         __be64                  blkno,
1901         uuid_t                  *uuid)
1902 {
1903
1904         /* verify owner */
1905         if (be64_to_cpu(owner) != ino) {
1906                 do_warn(
1907 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
1908                         ino, (unsigned long long)be64_to_cpu(owner), xfs_buf_daddr(bp));
1909                 return 1;
1910         }
1911         /* verify block number */
1912         if (be64_to_cpu(blkno) != xfs_buf_daddr(bp)) {
1913                 do_warn(
1914 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
1915                         xfs_buf_daddr(bp), (unsigned long long)be64_to_cpu(blkno), ino);
1916                 return 1;
1917         }
1918         /* verify uuid */
1919         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
1920                 do_warn(
1921 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
1922                         ino, xfs_buf_daddr(bp));
1923                 return 1;
1924         }
1925
1926         return 0;
1927 }
1928
1929 static int
1930 check_da3_header(
1931         struct xfs_mount        *mp,
1932         struct xfs_buf          *bp,
1933         xfs_ino_t               ino)
1934 {
1935         struct xfs_da3_blkinfo  *info = bp->b_addr;
1936
1937         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1938                                    &info->uuid);
1939 }
1940
1941 static int
1942 check_dir3_header(
1943         struct xfs_mount        *mp,
1944         struct xfs_buf          *bp,
1945         xfs_ino_t               ino)
1946 {
1947         struct xfs_dir3_blk_hdr *info = bp->b_addr;
1948
1949         return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1950                                    &info->uuid);
1951 }
1952
1953 /*
1954  * Check contents of leaf-form block.
1955  */
1956 static int
1957 longform_dir2_check_leaf(
1958         struct xfs_mount        *mp,
1959         struct xfs_inode        *ip,
1960         struct dir_hash_tab     *hashtab,
1961         struct freetab          *freetab)
1962 {
1963         int                     badtail;
1964         __be16                  *bestsp;
1965         struct xfs_buf          *bp;
1966         xfs_dablk_t             da_bno;
1967         int                     i;
1968         xfs_dir2_leaf_t         *leaf;
1969         xfs_dir2_leaf_tail_t    *ltp;
1970         int                     seeval;
1971         struct xfs_dir2_leaf_entry *ents;
1972         struct xfs_dir3_icleaf_hdr leafhdr;
1973         int                     error;
1974         int                     fixit = 0;
1975
1976         da_bno = mp->m_dir_geo->leafblk;
1977         error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_leaf1_buf_ops, &fixit);
1978         if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
1979                 do_warn(
1980         _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
1981                         da_bno, ip->i_ino);
1982                 return 1;
1983         } else if (error) {
1984                 do_error(
1985         _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
1986                         da_bno, ip->i_ino, error);
1987                 /* NOTREACHED */
1988         }
1989
1990         leaf = bp->b_addr;
1991         libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
1992         ents = leafhdr.ents;
1993         ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
1994         bestsp = xfs_dir2_leaf_bests_p(ltp);
1995         if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1996               leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
1997                                 leafhdr.forw || leafhdr.back ||
1998                                 leafhdr.count < leafhdr.stale ||
1999                                 leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2000                                 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2001                 do_warn(
2002         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2003                         da_bno, ip->i_ino);
2004                 libxfs_buf_relse(bp);
2005                 return 1;
2006         }
2007
2008         if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2009                 error = check_da3_header(mp, bp, ip->i_ino);
2010                 if (error) {
2011                         libxfs_buf_relse(bp);
2012                         return error;
2013                 }
2014         }
2015
2016         seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2017         if (dir_hash_check(hashtab, ip, seeval)) {
2018                 libxfs_buf_relse(bp);
2019                 return 1;
2020         }
2021         badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2022         for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2023                 freetab->ents[i].s = 1;
2024                 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2025         }
2026         if (badtail) {
2027                 do_warn(
2028         _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2029                         da_bno, ip->i_ino);
2030                 libxfs_buf_relse(bp);
2031                 return 1;
2032         }
2033         libxfs_buf_relse(bp);
2034         return fixit;
2035 }
2036
2037 /*
2038  * Check contents of the node blocks (leaves)
2039  * Looks for matching hash values for the data entries.
2040  */
2041 static int
2042 longform_dir2_check_node(
2043         struct xfs_mount        *mp,
2044         struct xfs_inode        *ip,
2045         struct dir_hash_tab     *hashtab,
2046         struct freetab          *freetab)
2047 {
2048         struct xfs_buf          *bp;
2049         xfs_dablk_t             da_bno;
2050         xfs_dir2_db_t           fdb;
2051         xfs_dir2_free_t         *free;
2052         int                     i;
2053         xfs_dir2_leaf_t         *leaf;
2054         xfs_fileoff_t           next_da_bno;
2055         int                     seeval = 0;
2056         int                     used;
2057         struct xfs_dir2_leaf_entry *ents;
2058         struct xfs_dir3_icleaf_hdr leafhdr;
2059         struct xfs_dir3_icfree_hdr freehdr;
2060         __be16                  *bests;
2061         int                     error;
2062         int                     fixit = 0;
2063
2064         for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2065                         next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2066                         da_bno = (xfs_dablk_t)next_da_bno) {
2067                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2068                 if (bmap_next_offset(ip, &next_da_bno))
2069                         break;
2070
2071                 /*
2072                  * we need to use the da3 node verifier here as it handles the
2073                  * fact that reading the leaf hash tree blocks can return either
2074                  * leaf or node blocks and calls the correct verifier. If we get
2075                  * a node block, then we'll skip it below based on a magic
2076                  * number check.
2077                  */
2078                 error = dir_read_buf(ip, da_bno, &bp, &xfs_da3_node_buf_ops,
2079                                 &fixit);
2080                 if (error) {
2081                         do_warn(
2082         _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2083                                 da_bno, ip->i_ino, error);
2084                         return 1;
2085                 }
2086                 leaf = bp->b_addr;
2087                 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2088                 ents = leafhdr.ents;
2089                 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2090                       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2091                       leafhdr.magic == XFS_DA_NODE_MAGIC ||
2092                       leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2093                         do_warn(
2094         _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2095                                 leafhdr.magic, da_bno, ip->i_ino);
2096                         libxfs_buf_relse(bp);
2097                         return 1;
2098                 }
2099
2100                 /* check v5 metadata */
2101                 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2102                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2103                         error = check_da3_header(mp, bp, ip->i_ino);
2104                         if (error) {
2105                                 libxfs_buf_relse(bp);
2106                                 return error;
2107                         }
2108                 }
2109
2110                 /* ignore nodes */
2111                 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2112                     leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2113                         libxfs_buf_relse(bp);
2114                         continue;
2115                 }
2116
2117                 /*
2118                  * If there's a validator error, we need to ensure that we got
2119                  * the right ops on the buffer for when we write it back out.
2120                  */
2121                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2122                 if (leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2123                     leafhdr.count < leafhdr.stale) {
2124                         do_warn(
2125         _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2126                                 da_bno, ip->i_ino);
2127                         libxfs_buf_relse(bp);
2128                         return 1;
2129                 }
2130                 seeval = dir_hash_see_all(hashtab, ents,
2131                                         leafhdr.count, leafhdr.stale);
2132                 libxfs_buf_relse(bp);
2133                 if (seeval != DIR_HASH_CK_OK)
2134                         return 1;
2135         }
2136         if (dir_hash_check(hashtab, ip, seeval))
2137                 return 1;
2138
2139         for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2140              next_da_bno != NULLFILEOFF;
2141              da_bno = (xfs_dablk_t)next_da_bno) {
2142                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2143                 if (bmap_next_offset(ip, &next_da_bno))
2144                         break;
2145
2146                 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_free_buf_ops,
2147                                 &fixit);
2148                 if (error) {
2149                         do_warn(
2150         _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2151                                 da_bno, ip->i_ino, error);
2152                         return 1;
2153                 }
2154                 free = bp->b_addr;
2155                 libxfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
2156                 bests = freehdr.bests;
2157                 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2158                 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2159                       freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2160                     freehdr.firstdb !=
2161                         (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2162                         mp->m_dir_geo->free_max_bests ||
2163                     freehdr.nvalid < freehdr.nused) {
2164                         do_warn(
2165         _("free block %u for directory inode %" PRIu64 " bad header\n"),
2166                                 da_bno, ip->i_ino);
2167                         libxfs_buf_relse(bp);
2168                         return 1;
2169                 }
2170
2171                 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2172                         error = check_dir3_header(mp, bp, ip->i_ino);
2173                         if (error) {
2174                                 libxfs_buf_relse(bp);
2175                                 return error;
2176                         }
2177                 }
2178                 for (i = used = 0; i < freehdr.nvalid; i++) {
2179                         if (i + freehdr.firstdb >= freetab->nents ||
2180                                         freetab->ents[i + freehdr.firstdb].v !=
2181                                                 be16_to_cpu(bests[i])) {
2182                                 do_warn(
2183         _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2184                                         da_bno, i, ip->i_ino);
2185                                 libxfs_buf_relse(bp);
2186                                 return 1;
2187                         }
2188                         used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2189                         freetab->ents[i + freehdr.firstdb].s = 1;
2190                 }
2191                 if (used != freehdr.nused) {
2192                         do_warn(
2193         _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2194                                 da_bno, ip->i_ino);
2195                         libxfs_buf_relse(bp);
2196                         return 1;
2197                 }
2198                 libxfs_buf_relse(bp);
2199         }
2200         for (i = 0; i < freetab->nents; i++) {
2201                 if ((freetab->ents[i].s == 0) &&
2202                     (freetab->ents[i].v != NULLDATAOFF)) {
2203                         do_warn(
2204         _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2205                                 i, ip->i_ino);
2206                         return 1;
2207                 }
2208         }
2209         return fixit;
2210 }
2211
2212 /*
2213  * If a directory is corrupt, we need to read in as many entries as possible,
2214  * destroy the entry and create a new one with recovered name/inode pairs.
2215  * (ie. get libxfs to do all the grunt work)
2216  */
2217 static void
2218 longform_dir2_entry_check(
2219         struct xfs_mount        *mp,
2220         xfs_ino_t               ino,
2221         struct xfs_inode        *ip,
2222         int                     *num_illegal,
2223         int                     *need_dot,
2224         struct ino_tree_node    *irec,
2225         int                     ino_offset,
2226         struct dir_hash_tab     *hashtab)
2227 {
2228         struct xfs_buf          *bp = NULL;
2229         xfs_dablk_t             da_bno;
2230         freetab_t               *freetab;
2231         int                     i;
2232         int                     isblock;
2233         int                     isleaf;
2234         xfs_fileoff_t           next_da_bno;
2235         int                     seeval;
2236         int                     fixit = 0;
2237         struct xfs_da_args      args;
2238
2239         *need_dot = 1;
2240         freetab = malloc(FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2241         if (!freetab) {
2242                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2243                         __func__,
2244                         FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2245                 exit(1);
2246         }
2247         freetab->naents = ip->i_disk_size / mp->m_dir_geo->blksize;
2248         freetab->nents = 0;
2249         for (i = 0; i < freetab->naents; i++) {
2250                 freetab->ents[i].v = NULLDATAOFF;
2251                 freetab->ents[i].s = 0;
2252         }
2253
2254         /* is this a block, leaf, or node directory? */
2255         args.dp = ip;
2256         args.geo = mp->m_dir_geo;
2257         libxfs_dir2_isblock(&args, &isblock);
2258         libxfs_dir2_isleaf(&args, &isleaf);
2259
2260         /* check directory "data" blocks (ie. name/inode pairs) */
2261         for (da_bno = 0, next_da_bno = 0;
2262              next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2263              da_bno = (xfs_dablk_t)next_da_bno) {
2264                 const struct xfs_buf_ops *ops;
2265                 int                      error;
2266                 struct xfs_dir2_data_hdr *d;
2267
2268                 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2269                 if (bmap_next_offset(ip, &next_da_bno)) {
2270                         /*
2271                          * if this is the first block, there isn't anything we
2272                          * can recover so we just trash it.
2273                          */
2274                          if (da_bno == 0) {
2275                                 fixit++;
2276                                 goto out_fix;
2277                         }
2278                         break;
2279                 }
2280
2281                 if (isblock)
2282                         ops = &xfs_dir3_block_buf_ops;
2283                 else
2284                         ops = &xfs_dir3_data_buf_ops;
2285
2286                 error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
2287                 if (error) {
2288                         do_warn(
2289         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2290                                 da_bno, ino, error);
2291                         *num_illegal += 1;
2292
2293                         /*
2294                          * we try to read all "data" blocks, but if we are in
2295                          * block form and we fail, there isn't anything else to
2296                          * read, and nothing we can do but trash it.
2297                          */
2298                         if (isblock) {
2299                                 fixit++;
2300                                 goto out_fix;
2301                         }
2302                         continue;
2303                 }
2304
2305                 /* check v5 metadata */
2306                 d = bp->b_addr;
2307                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2308                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2309                         error = check_dir3_header(mp, bp, ino);
2310                         if (error) {
2311                                 fixit++;
2312                                 if (isblock)
2313                                         goto out_fix;
2314                                 continue;
2315                         }
2316                 }
2317
2318                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2319                                 irec, ino_offset, bp, hashtab,
2320                                 &freetab, da_bno, isblock);
2321                 if (isblock)
2322                         break;
2323
2324                 libxfs_buf_relse(bp);
2325         }
2326         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2327
2328         if (!dotdot_update) {
2329                 /* check btree and freespace */
2330                 if (isblock) {
2331                         struct xfs_dir2_data_hdr *block;
2332                         xfs_dir2_block_tail_t   *btp;
2333                         xfs_dir2_leaf_entry_t   *blp;
2334
2335                         block = bp->b_addr;
2336                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2337                         blp = xfs_dir2_block_leaf_p(btp);
2338                         seeval = dir_hash_see_all(hashtab, blp,
2339                                                 be32_to_cpu(btp->count),
2340                                                 be32_to_cpu(btp->stale));
2341                         if (dir_hash_check(hashtab, ip, seeval))
2342                                 fixit |= 1;
2343                 } else if (isleaf) {
2344                         fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2345                                                                 freetab);
2346                 } else {
2347                         fixit |= longform_dir2_check_node(mp, ip, hashtab,
2348                                                                 freetab);
2349                 }
2350         }
2351 out_fix:
2352         if (isblock && bp)
2353                 libxfs_buf_relse(bp);
2354
2355         if (!no_modify && (fixit || dotdot_update)) {
2356                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2357                 *num_illegal = 0;
2358                 *need_dot = 0;
2359         } else {
2360                 if (fixit || dotdot_update)
2361                         do_warn(
2362         _("would rebuild directory inode %" PRIu64 "\n"), ino);
2363         }
2364
2365         free(freetab);
2366 }
2367
2368 /*
2369  * shortform directory v2 processing routines -- entry verification and
2370  * bad entry deletion (pruning).
2371  */
2372 static struct xfs_dir2_sf_entry *
2373 shortform_dir2_junk(
2374         struct xfs_mount        *mp,
2375         struct xfs_dir2_sf_hdr  *sfp,
2376         struct xfs_dir2_sf_entry *sfep,
2377         xfs_ino_t               lino,
2378         int                     *max_size,
2379         int                     *index,
2380         int                     *bytes_deleted,
2381         int                     *ino_dirty)
2382 {
2383         struct xfs_dir2_sf_entry *next_sfep;
2384         int                     next_len;
2385         int                     next_elen;
2386
2387         if (lino == orphanage_ino)
2388                 orphanage_ino = 0;
2389
2390         next_elen = libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen);
2391         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2392
2393         /*
2394          * if we are just checking, simply return the pointer to the next entry
2395          * here so that the checking loop can continue.
2396          */
2397         if (no_modify) {
2398                 do_warn(_("would junk entry\n"));
2399                 return next_sfep;
2400         }
2401
2402         /*
2403          * now move all the remaining entries down over the junked entry and
2404          * clear the newly unused bytes at the tail of the directory region.
2405          */
2406         next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2407         *max_size -= next_elen;
2408         *bytes_deleted += next_elen;
2409
2410         memmove(sfep, next_sfep, next_len);
2411         memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2412         sfp->count -= 1;
2413         *ino_dirty = 1;
2414
2415         /*
2416          * WARNING:  drop the index i by one so it matches the decremented count
2417          * for accurate comparisons in the loop test
2418          */
2419         (*index)--;
2420
2421         if (verbose)
2422                 do_warn(_("junking entry\n"));
2423         else
2424                 do_warn("\n");
2425         return sfep;
2426 }
2427
2428 static void
2429 shortform_dir2_entry_check(
2430         struct xfs_mount        *mp,
2431         xfs_ino_t               ino,
2432         struct xfs_inode        *ip,
2433         int                     *ino_dirty,
2434         struct ino_tree_node    *current_irec,
2435         int                     current_ino_offset,
2436         struct dir_hash_tab     *hashtab)
2437 {
2438         xfs_ino_t               lino;
2439         xfs_ino_t               parent;
2440         struct xfs_dir2_sf_hdr  *sfp;
2441         struct xfs_dir2_sf_entry *sfep;
2442         struct xfs_dir2_sf_entry *next_sfep;
2443         struct xfs_ifork        *ifp;
2444         struct ino_tree_node    *irec;
2445         int                     max_size;
2446         int                     ino_offset;
2447         int                     i;
2448         int                     bad_sfnamelen;
2449         int                     namelen;
2450         int                     bytes_deleted;
2451         char                    fname[MAXNAMELEN + 1];
2452         int                     i8;
2453
2454         ifp = &ip->i_df;
2455         sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2456         *ino_dirty = 0;
2457         bytes_deleted = 0;
2458
2459         max_size = ifp->if_bytes;
2460         ASSERT(ip->i_disk_size <= ifp->if_bytes);
2461
2462         /*
2463          * if just rebuild a directory due to a "..", update and return
2464          */
2465         if (dotdot_update) {
2466                 parent = get_inode_parent(current_irec, current_ino_offset);
2467                 if (no_modify) {
2468                         do_warn(
2469         _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2470                                 ino, parent);
2471                 } else {
2472                         do_warn(
2473         _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2474                                 ino, parent);
2475                         libxfs_dir2_sf_put_parent_ino(sfp, parent);
2476                         *ino_dirty = 1;
2477                 }
2478                 return;
2479         }
2480
2481         /*
2482          * no '.' entry in shortform dirs, just bump up ref count by 1
2483          * '..' was already (or will be) accounted for and checked when
2484          * the directory is reached or will be taken care of when the
2485          * directory is moved to orphanage.
2486          */
2487         add_inode_ref(current_irec, current_ino_offset);
2488
2489         /*
2490          * Initialise i8 counter -- the parent inode number counts as well.
2491          */
2492         i8 = libxfs_dir2_sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2493
2494         /*
2495          * now run through entries, stop at first bad entry, don't need
2496          * to skip over '..' since that's encoded in its own field and
2497          * no need to worry about '.' since it doesn't exist.
2498          */
2499         sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2500
2501         for (i = 0; i < sfp->count && max_size >
2502                                         (intptr_t)next_sfep - (intptr_t)sfp;
2503                         sfep = next_sfep, i++)  {
2504                 bad_sfnamelen = 0;
2505
2506                 lino = libxfs_dir2_sf_get_ino(mp, sfp, sfep);
2507
2508                 namelen = sfep->namelen;
2509
2510                 ASSERT(no_modify || namelen > 0);
2511
2512                 if (no_modify && namelen == 0)  {
2513                         /*
2514                          * if we're really lucky, this is
2515                          * the last entry in which case we
2516                          * can use the dir size to set the
2517                          * namelen value.  otherwise, forget
2518                          * it because we're not going to be
2519                          * able to find the next entry.
2520                          */
2521                         bad_sfnamelen = 1;
2522
2523                         if (i == sfp->count - 1)  {
2524                                 namelen = ip->i_disk_size -
2525                                         ((intptr_t) &sfep->name[0] -
2526                                          (intptr_t) sfp);
2527                         } else  {
2528                                 /*
2529                                  * don't process the rest of the directory,
2530                                  * break out of processing loop
2531                                  */
2532                                 break;
2533                         }
2534                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2535                                 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
2536                                 > ip->i_disk_size)  {
2537                         bad_sfnamelen = 1;
2538
2539                         if (i == sfp->count - 1)  {
2540                                 namelen = ip->i_disk_size -
2541                                         ((intptr_t) &sfep->name[0] -
2542                                          (intptr_t) sfp);
2543                         } else  {
2544                                 /*
2545                                  * don't process the rest of the directory,
2546                                  * break out of processing loop
2547                                  */
2548                                 break;
2549                         }
2550                 }
2551
2552                 memmove(fname, sfep->name, sfep->namelen);
2553                 fname[sfep->namelen] = '\0';
2554
2555                 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2556                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, lino));
2557
2558                 /*
2559                  * Also skip entries with bogus inode numbers if we're
2560                  * in no modify mode.
2561                  */
2562
2563                 if (no_modify && !libxfs_verify_dir_ino(mp, lino))  {
2564                         next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2565                         continue;
2566                 }
2567
2568                 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2569                                         XFS_INO_TO_AGINO(mp, lino));
2570
2571                 if (irec == NULL)  {
2572                         do_warn(
2573         _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2574                                 fname, ino, lino);
2575                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2576                                                 &max_size, &i, &bytes_deleted,
2577                                                 ino_dirty);
2578                         continue;
2579                 }
2580
2581                 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2582
2583                 /*
2584                  * if it's a free inode, blow out the entry.
2585                  * by now, any inode that we think is free
2586                  * really is free.
2587                  */
2588                 if (is_inode_free(irec, ino_offset))  {
2589                         do_warn(
2590         _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2591                                 fname, ino, lino);
2592                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2593                                                 &max_size, &i, &bytes_deleted,
2594                                                 ino_dirty);
2595                         continue;
2596                 }
2597                 /*
2598                  * check if this inode is lost+found dir in the root
2599                  */
2600                 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2601                         /*
2602                          * if it's not a directory, trash it
2603                          */
2604                         if (!inode_isadir(irec, ino_offset)) {
2605                                 do_warn(
2606         _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2607                                         ORPHANAGE, lino, ino);
2608                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2609                                                 lino, &max_size, &i,
2610                                                 &bytes_deleted, ino_dirty);
2611                                 continue;
2612                         }
2613                         /*
2614                          * if this is a dup, it will be picked up below,
2615                          * otherwise, mark it as the orphanage for later.
2616                          */
2617                         if (!orphanage_ino)
2618                                 orphanage_ino = lino;
2619                 }
2620                 /*
2621                  * check for duplicate names in directory.
2622                  */
2623                 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2624                                 (sfep - xfs_dir2_sf_firstentry(sfp)),
2625                                 lino, sfep->namelen, sfep->name,
2626                                 libxfs_dir2_sf_get_ftype(mp, sfep))) {
2627                         do_warn(
2628 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2629                                 fname, lino, ino);
2630                         next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2631                                                 &max_size, &i, &bytes_deleted,
2632                                                 ino_dirty);
2633                         continue;
2634                 }
2635
2636                 if (!inode_isadir(irec, ino_offset))  {
2637                         /*
2638                          * check easy case first, regular inode, just bump
2639                          * the link count
2640                          */
2641                         add_inode_reached(irec, ino_offset);
2642                 } else  {
2643                         parent = get_inode_parent(irec, ino_offset);
2644
2645                         /*
2646                          * bump up the link counts in parent and child.
2647                          * directory but if the link doesn't agree with
2648                          * the .. in the child, blow out the entry
2649                          */
2650                         if (is_inode_reached(irec, ino_offset))  {
2651                                 do_warn(
2652         _("entry \"%s\" in directory inode %" PRIu64
2653           " references already connected inode %" PRIu64 ".\n"),
2654                                         fname, ino, lino);
2655                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2656                                                 lino, &max_size, &i,
2657                                                 &bytes_deleted, ino_dirty);
2658                                 continue;
2659                         } else if (parent == ino)  {
2660                                 add_inode_reached(irec, ino_offset);
2661                                 add_inode_ref(current_irec, current_ino_offset);
2662                         } else if (parent == NULLFSINO) {
2663                                 /* ".." was missing, but this entry refers to it,
2664                                 so, set it as the parent and mark for rebuild */
2665                                 do_warn(
2666         _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2667                                         fname, ino, lino);
2668                                 set_inode_parent(irec, ino_offset, ino);
2669                                 add_inode_reached(irec, ino_offset);
2670                                 add_inode_ref(current_irec, current_ino_offset);
2671                                 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2672                                                         irec, ino_offset);
2673                         } else  {
2674                                 do_warn(
2675         _("entry \"%s\" in directory inode %" PRIu64
2676           " not consistent with .. value (%" PRIu64
2677           ") in inode %" PRIu64 ",\n"),
2678                                         fname, ino, parent, lino);
2679                                 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2680                                                 lino, &max_size, &i,
2681                                                 &bytes_deleted, ino_dirty);
2682                                 continue;
2683                         }
2684                 }
2685
2686                 /* validate ftype field if supported */
2687                 if (xfs_has_ftype(mp)) {
2688                         uint8_t dir_ftype;
2689                         uint8_t ino_ftype;
2690
2691                         dir_ftype = libxfs_dir2_sf_get_ftype(mp, sfep);
2692                         ino_ftype = get_inode_ftype(irec, ino_offset);
2693
2694                         if (dir_ftype != ino_ftype) {
2695                                 if (no_modify) {
2696                                         do_warn(
2697         _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2698                                                 dir_ftype, ino_ftype,
2699                                                 ino, lino);
2700                                 } else {
2701                                         do_warn(
2702         _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2703                                                 dir_ftype, ino_ftype,
2704                                                 ino, lino);
2705                                         libxfs_dir2_sf_put_ftype(mp, sfep,
2706                                                                 ino_ftype);
2707                                         dir_hash_update_ftype(hashtab,
2708                         (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2709                                                               ino_ftype);
2710                                         *ino_dirty = 1;
2711                                 }
2712                         }
2713                 }
2714
2715                 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2716                         i8++;
2717
2718                 /*
2719                  * go onto next entry - we have to take entries with bad namelen
2720                  * into account in no modify mode since we calculate size based
2721                  * on next_sfep.
2722                  */
2723                 ASSERT(no_modify || bad_sfnamelen == 0);
2724                 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2725                               (bad_sfnamelen
2726                                 ? libxfs_dir2_sf_entsize(mp, sfp, namelen)
2727                                 : libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)));
2728         }
2729
2730         if (sfp->i8count != i8) {
2731                 if (no_modify) {
2732                         do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2733                                 ino);
2734                 } else {
2735                         if (i8 == 0) {
2736                                 struct xfs_dir2_sf_entry *tmp_sfep;
2737
2738                                 tmp_sfep = next_sfep;
2739                                 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2740                                 bytes_deleted +=
2741                                         (intptr_t)next_sfep -
2742                                         (intptr_t)tmp_sfep;
2743                                 next_sfep = tmp_sfep;
2744                         } else
2745                                 sfp->i8count = i8;
2746                         *ino_dirty = 1;
2747                         do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2748                                 ino);
2749                 }
2750         }
2751
2752         /*
2753          * sync up sizes if required
2754          */
2755         if (*ino_dirty && bytes_deleted > 0)  {
2756                 ASSERT(!no_modify);
2757                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2758                 ip->i_disk_size -= bytes_deleted;
2759         }
2760
2761         if (ip->i_disk_size != ip->i_df.if_bytes)  {
2762                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2763                                 ((intptr_t) next_sfep - (intptr_t) sfp));
2764                 ip->i_disk_size = (xfs_fsize_t)
2765                                 ((intptr_t) next_sfep - (intptr_t) sfp);
2766                 do_warn(
2767         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2768                         ip->i_disk_size);
2769                 *ino_dirty = 1;
2770         }
2771 }
2772
2773 /*
2774  * processes all reachable inodes in directories
2775  */
2776 static void
2777 process_dir_inode(
2778         struct xfs_mount        *mp,
2779         xfs_agnumber_t          agno,
2780         struct ino_tree_node    *irec,
2781         int                     ino_offset)
2782 {
2783         xfs_ino_t               ino;
2784         struct xfs_inode        *ip;
2785         struct xfs_trans        *tp;
2786         struct dir_hash_tab     *hashtab;
2787         int                     need_dot;
2788         int                     dirty, num_illegal, error, nres;
2789
2790         ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2791
2792         /*
2793          * open up directory inode, check all entries,
2794          * then call prune_dir_entries to remove all
2795          * remaining illegal directory entries.
2796          */
2797
2798         ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2799
2800         error = -libxfs_iget(mp, NULL, ino, 0, &ip);
2801         if (error) {
2802                 if (!no_modify)
2803                         do_error(
2804         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2805                                 ino, error);
2806                 else  {
2807                         do_warn(
2808         _("couldn't map inode %" PRIu64 ", err = %d\n"),
2809                                 ino, error);
2810                         /*
2811                          * see below for what we're doing if this
2812                          * is root.  Why do we need to do this here?
2813                          * to ensure that the root doesn't show up
2814                          * as being disconnected in the no_modify case.
2815                          */
2816                         if (mp->m_sb.sb_rootino == ino)  {
2817                                 add_inode_reached(irec, 0);
2818                                 add_inode_ref(irec, 0);
2819                         }
2820                 }
2821
2822                 add_inode_refchecked(irec, 0);
2823                 return;
2824         }
2825
2826         need_dot = dirty = num_illegal = 0;
2827
2828         if (mp->m_sb.sb_rootino == ino)  {
2829                 /*
2830                  * mark root inode reached and bump up
2831                  * link count for root inode to account
2832                  * for '..' entry since the root inode is
2833                  * never reached by a parent.  we know
2834                  * that root's '..' is always good --
2835                  * guaranteed by phase 3 and/or below.
2836                  */
2837                 add_inode_reached(irec, ino_offset);
2838         }
2839
2840         add_inode_refchecked(irec, ino_offset);
2841
2842         hashtab = dir_hash_init(ip->i_disk_size);
2843
2844         /*
2845          * look for bogus entries
2846          */
2847         switch (ip->i_df.if_format)  {
2848                 case XFS_DINODE_FMT_EXTENTS:
2849                 case XFS_DINODE_FMT_BTREE:
2850                         /*
2851                          * also check for missing '.' in longform dirs.
2852                          * missing .. entries are added if required when
2853                          * the directory is connected to lost+found. but
2854                          * we need to create '.' entries here.
2855                          */
2856                         longform_dir2_entry_check(mp, ino, ip,
2857                                                 &num_illegal, &need_dot,
2858                                                 irec, ino_offset,
2859                                                 hashtab);
2860                         break;
2861
2862                 case XFS_DINODE_FMT_LOCAL:
2863                         /*
2864                          * using the remove reservation is overkill
2865                          * since at most we'll only need to log the
2866                          * inode but it's easier than wedging a
2867                          * new define in ourselves.
2868                          */
2869                         nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2870                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2871                                                     nres, 0, 0, &tp);
2872                         if (error)
2873                                 res_failed(error);
2874
2875                         libxfs_trans_ijoin(tp, ip, 0);
2876
2877                         shortform_dir2_entry_check(mp, ino, ip, &dirty,
2878                                                 irec, ino_offset,
2879                                                 hashtab);
2880
2881                         ASSERT(dirty == 0 || (dirty && !no_modify));
2882                         if (dirty)  {
2883                                 libxfs_trans_log_inode(tp, ip,
2884                                         XFS_ILOG_CORE | XFS_ILOG_DDATA);
2885                                 error = -libxfs_trans_commit(tp);
2886                                 if (error)
2887                                         do_error(
2888 _("error %d fixing shortform directory %llu\n"),
2889                                                 error,
2890                                                 (unsigned long long)ip->i_ino);
2891                         } else  {
2892                                 libxfs_trans_cancel(tp);
2893                         }
2894                         break;
2895
2896                 default:
2897                         break;
2898         }
2899         dir_hash_done(hashtab);
2900
2901         /*
2902          * if we have to create a .. for /, do it now *before*
2903          * we delete the bogus entries, otherwise the directory
2904          * could transform into a shortform dir which would
2905          * probably cause the simulation to choke.  Even
2906          * if the illegal entries get shifted around, it's ok
2907          * because the entries are structurally intact and in
2908          * in hash-value order so the simulation won't get confused
2909          * if it has to move them around.
2910          */
2911         if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2912                 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
2913
2914                 do_warn(_("recreating root directory .. entry\n"));
2915
2916                 nres = XFS_MKDIR_SPACE_RES(mp, 2);
2917                 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2918                                             nres, 0, 0, &tp);
2919                 if (error)
2920                         res_failed(error);
2921
2922                 libxfs_trans_ijoin(tp, ip, 0);
2923
2924                 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
2925                                         ip->i_ino, nres);
2926                 if (error)
2927                         do_error(
2928         _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
2929
2930                 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2931                 error = -libxfs_trans_commit(tp);
2932                 if (error)
2933                         do_error(
2934         _("root inode \"..\" entry recreation failed (%d)\n"), error);
2935
2936                 need_root_dotdot = 0;
2937         } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
2938                 do_warn(_("would recreate root directory .. entry\n"));
2939         }
2940
2941         /*
2942          * if we need to create the '.' entry, do so only if
2943          * the directory is a longform dir.  if it's been
2944          * turned into a shortform dir, then the inode is ok
2945          * since shortform dirs have no '.' entry and the inode
2946          * has already been committed by prune_lf_dir_entry().
2947          */
2948         if (need_dot)  {
2949                 /*
2950                  * bump up our link count but don't
2951                  * bump up the inode link count.  chances
2952                  * are good that even though we lost '.'
2953                  * the inode link counts reflect '.' so
2954                  * leave the inode link count alone and if
2955                  * it turns out to be wrong, we'll catch
2956                  * that in phase 7.
2957                  */
2958                 add_inode_ref(irec, ino_offset);
2959
2960                 if (no_modify)  {
2961                         do_warn(
2962         _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
2963                                 ino);
2964                 } else if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL)  {
2965                         /*
2966                          * need to create . entry in longform dir.
2967                          */
2968                         do_warn(
2969         _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
2970
2971                         nres = XFS_MKDIR_SPACE_RES(mp, 1);
2972                         error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2973                                                     nres, 0, 0, &tp);
2974                         if (error)
2975                                 res_failed(error);
2976
2977                         libxfs_trans_ijoin(tp, ip, 0);
2978
2979                         error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
2980                                         ip->i_ino, nres);
2981                         if (error)
2982                                 do_error(
2983         _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
2984                                         ino, error);
2985
2986                         libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2987                         error = -libxfs_trans_commit(tp);
2988                         if (error)
2989                                 do_error(
2990         _("root inode \".\" entry recreation failed (%d)\n"), error);
2991                 }
2992         }
2993         libxfs_irele(ip);
2994 }
2995
2996 /*
2997  * mark realtime bitmap and summary inodes as reached.
2998  * quota inode will be marked here as well
2999  */
3000 static void
3001 mark_standalone_inodes(xfs_mount_t *mp)
3002 {
3003         ino_tree_node_t         *irec;
3004         int                     offset;
3005
3006         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3007                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3008
3009         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3010                         irec->ino_startnum;
3011
3012         add_inode_reached(irec, offset);
3013
3014         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3015                         XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3016
3017         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3018                         irec->ino_startnum;
3019
3020         add_inode_reached(irec, offset);
3021
3022         if (fs_quotas)  {
3023                 if (mp->m_sb.sb_uquotino
3024                                 && mp->m_sb.sb_uquotino != NULLFSINO)  {
3025                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3026                                                 mp->m_sb.sb_uquotino),
3027                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3028                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3029                                         - irec->ino_startnum;
3030                         add_inode_reached(irec, offset);
3031                 }
3032                 if (mp->m_sb.sb_gquotino
3033                                 && mp->m_sb.sb_gquotino != NULLFSINO)  {
3034                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3035                                                 mp->m_sb.sb_gquotino),
3036                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3037                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3038                                         - irec->ino_startnum;
3039                         add_inode_reached(irec, offset);
3040                 }
3041                 if (mp->m_sb.sb_pquotino
3042                                 && mp->m_sb.sb_pquotino != NULLFSINO)  {
3043                         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3044                                                 mp->m_sb.sb_pquotino),
3045                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3046                         offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3047                                         - irec->ino_startnum;
3048                         add_inode_reached(irec, offset);
3049                 }
3050         }
3051 }
3052
3053 static void
3054 check_for_orphaned_inodes(
3055         xfs_mount_t             *mp,
3056         xfs_agnumber_t          agno,
3057         ino_tree_node_t         *irec)
3058 {
3059         int                     i;
3060         xfs_ino_t               ino;
3061
3062         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3063                 ASSERT(is_inode_confirmed(irec, i));
3064                 if (is_inode_free(irec, i))
3065                         continue;
3066
3067                 if (is_inode_reached(irec, i))
3068                         continue;
3069
3070                 ASSERT(inode_isadir(irec, i) ||
3071                         num_inode_references(irec, i) == 0);
3072
3073                 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3074                 if (inode_isadir(irec, i))
3075                         do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3076                 else
3077                         do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3078                 if (!no_modify)  {
3079                         if (!orphanage_ino)
3080                                 orphanage_ino = mk_orphanage(mp);
3081                         do_warn(_("moving to %s\n"), ORPHANAGE);
3082                         mv_orphanage(mp, ino, inode_isadir(irec, i));
3083                 } else  {
3084                         do_warn(_("would move to %s\n"), ORPHANAGE);
3085                 }
3086                 /*
3087                  * for read-only case, even though the inode isn't
3088                  * really reachable, set the flag (and bump our link
3089                  * count) anyway to fool phase 7
3090                  */
3091                 add_inode_reached(irec, i);
3092         }
3093 }
3094
3095 static void
3096 do_dir_inode(
3097         struct workqueue        *wq,
3098         xfs_agnumber_t          agno,
3099         void                    *arg)
3100 {
3101         struct ino_tree_node    *irec = arg;
3102         int                     i;
3103
3104         for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
3105                 if (inode_isadir(irec, i))
3106                         process_dir_inode(wq->wq_ctx, agno, irec, i);
3107         }
3108 }
3109
3110 static void
3111 traverse_function(
3112         struct workqueue        *wq,
3113         xfs_agnumber_t          agno,
3114         void                    *arg)
3115 {
3116         struct ino_tree_node    *irec;
3117         prefetch_args_t         *pf_args = arg;
3118         struct workqueue        lwq;
3119         struct xfs_mount        *mp = wq->wq_ctx;
3120
3121         wait_for_inode_prefetch(pf_args);
3122
3123         if (verbose)
3124                 do_log(_("        - agno = %d\n"), agno);
3125
3126         /*
3127          * The more AGs we have in flight at once, the fewer processing threads
3128          * per AG. This means we don't overwhelm the machine with hundreds of
3129          * threads when we start acting on lots of AGs at once. We just want
3130          * enough that we can keep multiple CPUs busy across multiple AGs.
3131          */
3132         workqueue_create_bound(&lwq, mp, ag_stride, 1000);
3133
3134         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3135                 if (irec->ino_isa_dir == 0)
3136                         continue;
3137
3138                 if (pf_args) {
3139                         sem_post(&pf_args->ra_count);
3140 #ifdef XR_PF_TRACE
3141                         {
3142                         int     i;
3143                         sem_getvalue(&pf_args->ra_count, &i);
3144                         pftrace(
3145                 "processing inode chunk %p in AG %d (sem count = %d)",
3146                                 irec, agno, i);
3147                         }
3148 #endif
3149                 }
3150
3151                 queue_work(&lwq, do_dir_inode, agno, irec);
3152         }
3153         destroy_work_queue(&lwq);
3154         cleanup_inode_prefetch(pf_args);
3155 }
3156
3157 static void
3158 update_missing_dotdot_entries(
3159         xfs_mount_t             *mp)
3160 {
3161         dotdot_update_t         *dir;
3162
3163         /*
3164          * these entries parents were updated, rebuild them again
3165          * set dotdot_update flag so processing routines do not count links
3166          */
3167         dotdot_update = 1;
3168         while (!list_empty(&dotdot_update_list)) {
3169                 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3170                                  list);
3171                 list_del(&dir->list);
3172                 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3173                 free(dir);
3174         }
3175 }
3176
3177 static void
3178 traverse_ags(
3179         struct xfs_mount        *mp)
3180 {
3181         do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
3182 }
3183
3184 void
3185 phase6(xfs_mount_t *mp)
3186 {
3187         ino_tree_node_t         *irec;
3188         int                     i;
3189
3190         memset(&zerocr, 0, sizeof(struct cred));
3191         memset(&zerofsx, 0, sizeof(struct fsxattr));
3192         orphanage_ino = 0;
3193
3194         do_log(_("Phase 6 - check inode connectivity...\n"));
3195
3196         incore_ext_teardown(mp);
3197
3198         add_ino_ex_data(mp);
3199
3200         /*
3201          * verify existence of root directory - if we have to
3202          * make one, it's ok for the incore data structs not to
3203          * know about it since everything about it (and the other
3204          * inodes in its chunk if a new chunk was created) are ok
3205          */
3206         if (need_root_inode)  {
3207                 if (!no_modify)  {
3208                         do_warn(_("reinitializing root directory\n"));
3209                         mk_root_dir(mp);
3210                         need_root_inode = 0;
3211                         need_root_dotdot = 0;
3212                 } else  {
3213                         do_warn(_("would reinitialize root directory\n"));
3214                 }
3215         }
3216
3217         if (need_rbmino)  {
3218                 if (!no_modify)  {
3219                         do_warn(_("reinitializing realtime bitmap inode\n"));
3220                         mk_rbmino(mp);
3221                         need_rbmino = 0;
3222                 } else  {
3223                         do_warn(_("would reinitialize realtime bitmap inode\n"));
3224                 }
3225         }
3226
3227         if (need_rsumino)  {
3228                 if (!no_modify)  {
3229                         do_warn(_("reinitializing realtime summary inode\n"));
3230                         mk_rsumino(mp);
3231                         need_rsumino = 0;
3232                 } else  {
3233                         do_warn(_("would reinitialize realtime summary inode\n"));
3234                 }
3235         }
3236
3237         if (!no_modify)  {
3238                 do_log(
3239 _("        - resetting contents of realtime bitmap and summary inodes\n"));
3240                 if (fill_rbmino(mp))  {
3241                         do_warn(
3242                         _("Warning:  realtime bitmap may be inconsistent\n"));
3243                 }
3244
3245                 if (fill_rsumino(mp))  {
3246                         do_warn(
3247                         _("Warning:  realtime bitmap may be inconsistent\n"));
3248                 }
3249         }
3250
3251         mark_standalone_inodes(mp);
3252
3253         do_log(_("        - traversing filesystem ...\n"));
3254
3255         irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3256                                 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3257
3258         /*
3259          * we always have a root inode, even if it's free...
3260          * if the root is free, forget it, lost+found is already gone
3261          */
3262         if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
3263                 need_root_inode = 1;
3264         }
3265
3266         /*
3267          * then process all inodes by walking incore inode tree
3268          */
3269         traverse_ags(mp);
3270
3271         /*
3272          * any directories that had updated ".." entries, rebuild them now
3273          */
3274         update_missing_dotdot_entries(mp);
3275
3276         do_log(_("        - traversal finished ...\n"));
3277         do_log(_("        - moving disconnected inodes to %s ...\n"),
3278                 ORPHANAGE);
3279
3280         /*
3281          * move all disconnected inodes to the orphanage
3282          */
3283         for (i = 0; i < glob_agcount; i++)  {
3284                 irec = findfirst_inode_rec(i);
3285                 while (irec != NULL)  {
3286                         check_for_orphaned_inodes(mp, i, irec);
3287                         irec = next_ino_rec(irec);
3288                 }
3289         }
3290 }