libxfs/util.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs_priv.h"
   8 #include "libxfs_io.h"
   9 #include "init.h"
  10 #include "xfs_fs.h"
  11 #include "xfs_shared.h"
  12 #include "xfs_format.h"
  13 #include "xfs_log_format.h"
  14 #include "xfs_trans_resv.h"
  15 #include "xfs_mount.h"
  16 #include "xfs_defer.h"
  17 #include "xfs_inode_buf.h"
  18 #include "xfs_inode_fork.h"
  19 #include "xfs_inode.h"
  20 #include "xfs_trans.h"
  21 #include "xfs_bmap.h"
  22 #include "xfs_bmap_btree.h"
  23 #include "xfs_trans_space.h"
  24 #include "xfs_ialloc.h"
  25 #include "xfs_alloc.h"
  26 #include "xfs_bit.h"
  27 #include "xfs_da_format.h"
  28 #include "xfs_da_btree.h"
  29 #include "xfs_dir2_priv.h"
  30
  31 /*
  32  * Calculate the worst case log unit reservation for a given superblock
  33  * configuration. Copied and munged from the kernel code, and assumes a
  34  * worse case header usage (maximum log buffer sizes)
  35  */
  36 int
  37 xfs_log_calc_unit_res(
  38         struct xfs_mount        *mp,
  39         int                     unit_bytes)
  40 {
  41         int                     iclog_space;
  42         int                     iclog_header_size;
  43         int                     iclog_size;
  44         uint                    num_headers;
  45
  46         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
  47                 iclog_size = XLOG_MAX_RECORD_BSIZE;
  48                 iclog_header_size = BBTOB(iclog_size / XLOG_HEADER_CYCLE_SIZE);
  49         } else {
  50                 iclog_size = XLOG_BIG_RECORD_BSIZE;
  51                 iclog_header_size = BBSIZE;
  52         }
  53
  54         /*
  55          * Permanent reservations have up to 'cnt'-1 active log operations
  56          * in the log.  A unit in this case is the amount of space for one
  57          * of these log operations.  Normal reservations have a cnt of 1
  58          * and their unit amount is the total amount of space required.
  59          *
  60          * The following lines of code account for non-transaction data
  61          * which occupy space in the on-disk log.
  62          *
  63          * Normal form of a transaction is:
  64          * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
  65          * and then there are LR hdrs, split-recs and roundoff at end of syncs.
  66          *
  67          * We need to account for all the leadup data and trailer data
  68          * around the transaction data.
  69          * And then we need to account for the worst case in terms of using
  70          * more space.
  71          * The worst case will happen if:
  72          * - the placement of the transaction happens to be such that the
  73          *   roundoff is at its maximum
  74          * - the transaction data is synced before the commit record is synced
  75          *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
  76          *   Therefore the commit record is in its own Log Record.
  77          *   This can happen as the commit record is called with its
  78          *   own region to xlog_write().
  79          *   This then means that in the worst case, roundoff can happen for
  80          *   the commit-rec as well.
  81          *   The commit-rec is smaller than padding in this scenario and so it is
  82          *   not added separately.
  83          */
  84
  85         /* for trans header */
  86         unit_bytes += sizeof(xlog_op_header_t);
  87         unit_bytes += sizeof(xfs_trans_header_t);
  88
  89         /* for start-rec */
  90         unit_bytes += sizeof(xlog_op_header_t);
  91
  92         /*
  93          * for LR headers - the space for data in an iclog is the size minus
  94          * the space used for the headers. If we use the iclog size, then we
  95          * undercalculate the number of headers required.
  96          *
  97          * Furthermore - the addition of op headers for split-recs might
  98          * increase the space required enough to require more log and op
  99          * headers, so take that into account too.
 100          *
 101          * IMPORTANT: This reservation makes the assumption that if this
 102          * transaction is the first in an iclog and hence has the LR headers
 103          * accounted to it, then the remaining space in the iclog is
 104          * exclusively for this transaction.  i.e. if the transaction is larger
 105          * than the iclog, it will be the only thing in that iclog.
 106          * Fundamentally, this means we must pass the entire log vector to
 107          * xlog_write to guarantee this.
 108          */
 109         iclog_space = iclog_size - iclog_header_size;
 110         num_headers = howmany(unit_bytes, iclog_space);
 111
 112         /* for split-recs - ophdrs added when data split over LRs */
 113         unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 114
 115         /* add extra header reservations if we overrun */
 116         while (!num_headers ||
 117                howmany(unit_bytes, iclog_space) > num_headers) {
 118                 unit_bytes += sizeof(xlog_op_header_t);
 119                 num_headers++;
 120         }
 121         unit_bytes += iclog_header_size * num_headers;
 122
 123         /* for commit-rec LR header - note: padding will subsume the ophdr */
 124         unit_bytes += iclog_header_size;
 125
 126         /* for roundoff padding for transaction data and one for commit record */
 127         if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
 128                 /* log su roundoff */
 129                 unit_bytes += 2 * mp->m_sb.sb_logsunit;
 130         } else {
 131                 /* BB roundoff */
 132                 unit_bytes += 2 * BBSIZE;
 133         }
 134
 135         return unit_bytes;
 136 }
 137
 138 /*
 139  * Change the requested timestamp in the given inode.
 140  *
 141  * This was once shared with the kernel, but has diverged to the point
 142  * where it's no longer worth the hassle of maintaining common code.
 143  */
 144 void
 145 libxfs_trans_ichgtime(
 146         struct xfs_trans        *tp,
 147         struct xfs_inode        *ip,
 148         int                     flags)
 149 {
 150         struct timespec tv;
 151         struct timeval  stv;
 152
 153         gettimeofday(&stv, (struct timezone *)0);
 154         tv.tv_sec = stv.tv_sec;
 155         tv.tv_nsec = stv.tv_usec * 1000;
 156         if (flags & XFS_ICHGTIME_MOD)
 157                 VFS_I(ip)->i_mtime = tv;
 158         if (flags & XFS_ICHGTIME_CHG)
 159                 VFS_I(ip)->i_ctime = tv;
 160         if (flags & XFS_ICHGTIME_CREATE) {
 161                 ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
 162                 ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
 163         }
 164 }
 165
 166 STATIC uint16_t
 167 xfs_flags2diflags(
 168         struct xfs_inode        *ip,
 169         unsigned int            xflags)
 170 {
 171         /* can't set PREALLOC this way, just preserve it */
 172         uint16_t                di_flags =
 173                 (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
 174
 175         if (xflags & FS_XFLAG_IMMUTABLE)
 176                 di_flags |= XFS_DIFLAG_IMMUTABLE;
 177         if (xflags & FS_XFLAG_APPEND)
 178                 di_flags |= XFS_DIFLAG_APPEND;
 179         if (xflags & FS_XFLAG_SYNC)
 180                 di_flags |= XFS_DIFLAG_SYNC;
 181         if (xflags & FS_XFLAG_NOATIME)
 182                 di_flags |= XFS_DIFLAG_NOATIME;
 183         if (xflags & FS_XFLAG_NODUMP)
 184                 di_flags |= XFS_DIFLAG_NODUMP;
 185         if (xflags & FS_XFLAG_NODEFRAG)
 186                 di_flags |= XFS_DIFLAG_NODEFRAG;
 187         if (xflags & FS_XFLAG_FILESTREAM)
 188                 di_flags |= XFS_DIFLAG_FILESTREAM;
 189         if (S_ISDIR(VFS_I(ip)->i_mode)) {
 190                 if (xflags & FS_XFLAG_RTINHERIT)
 191                         di_flags |= XFS_DIFLAG_RTINHERIT;
 192                 if (xflags & FS_XFLAG_NOSYMLINKS)
 193                         di_flags |= XFS_DIFLAG_NOSYMLINKS;
 194                 if (xflags & FS_XFLAG_EXTSZINHERIT)
 195                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 196                 if (xflags & FS_XFLAG_PROJINHERIT)
 197                         di_flags |= XFS_DIFLAG_PROJINHERIT;
 198         } else if (S_ISREG(VFS_I(ip)->i_mode)) {
 199                 if (xflags & FS_XFLAG_REALTIME)
 200                         di_flags |= XFS_DIFLAG_REALTIME;
 201                 if (xflags & FS_XFLAG_EXTSIZE)
 202                         di_flags |= XFS_DIFLAG_EXTSIZE;
 203         }
 204
 205         return di_flags;
 206 }
 207
 208 STATIC uint64_t
 209 xfs_flags2diflags2(
 210         struct xfs_inode        *ip,
 211         unsigned int            xflags)
 212 {
 213         uint64_t                di_flags2 =
 214                 (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
 215
 216         if (xflags & FS_XFLAG_DAX)
 217                 di_flags2 |= XFS_DIFLAG2_DAX;
 218         if (xflags & FS_XFLAG_COWEXTSIZE)
 219                 di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 220
 221         return di_flags2;
 222 }
 223
 224 /*
 225  * Allocate an inode on disk and return a copy of its in-core version.
 226  * Set mode, nlink, and rdev appropriately within the inode.
 227  * The uid and gid for the inode are set according to the contents of
 228  * the given cred structure.
 229  *
 230  * This was once shared with the kernel, but has diverged to the point
 231  * where it's no longer worth the hassle of maintaining common code.
 232  */
 233 int
 234 libxfs_ialloc(
 235         xfs_trans_t     *tp,
 236         xfs_inode_t     *pip,
 237         mode_t          mode,
 238         nlink_t         nlink,
 239         xfs_dev_t       rdev,
 240         struct cred     *cr,
 241         struct fsxattr  *fsx,
 242         xfs_buf_t       **ialloc_context,
 243         xfs_inode_t     **ipp)
 244 {
 245         xfs_ino_t       ino;
 246         xfs_inode_t     *ip;
 247         uint            flags;
 248         int             error;
 249
 250         /*
 251          * Call the space management code to pick
 252          * the on-disk inode to be allocated.
 253          */
 254         error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
 255                             ialloc_context, &ino);
 256         if (error != 0)
 257                 return error;
 258         if (*ialloc_context || ino == NULLFSINO) {
 259                 *ipp = NULL;
 260                 return 0;
 261         }
 262         ASSERT(*ialloc_context == NULL);
 263
 264         error = xfs_trans_iget(tp->t_mountp, tp, ino, 0, 0, &ip);
 265         if (error != 0)
 266                 return error;
 267         ASSERT(ip != NULL);
 268
 269         VFS_I(ip)->i_mode = mode;
 270         set_nlink(VFS_I(ip), nlink);
 271         ip->i_d.di_uid = cr->cr_uid;
 272         ip->i_d.di_gid = cr->cr_gid;
 273         xfs_set_projid(&ip->i_d, pip ? 0 : fsx->fsx_projid);
 274         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD);
 275
 276         /*
 277          * We only support filesystems that understand v2 format inodes. So if
 278          * this is currently an old format inode, then change the inode version
 279          * number now.  This way we only do the conversion here rather than here
 280          * and in the flush/logging code.
 281          */
 282         if (ip->i_d.di_version == 1) {
 283                 ip->i_d.di_version = 2;
 284                 /*
 285                  * old link count, projid_lo/hi field, pad field
 286                  * already zeroed
 287                  */
 288         }
 289
 290         if (pip && (VFS_I(pip)->i_mode & S_ISGID)) {
 291                 ip->i_d.di_gid = pip->i_d.di_gid;
 292                 if ((VFS_I(pip)->i_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR)
 293                         VFS_I(ip)->i_mode |= S_ISGID;
 294         }
 295
 296         ip->i_d.di_size = 0;
 297         ip->i_d.di_nextents = 0;
 298         ASSERT(ip->i_d.di_nblocks == 0);
 299         ip->i_d.di_extsize = pip ? 0 : fsx->fsx_extsize;
 300         ip->i_d.di_dmevmask = 0;
 301         ip->i_d.di_dmstate = 0;
 302         ip->i_d.di_flags = pip ? 0 : xfs_flags2diflags(ip, fsx->fsx_xflags);
 303
 304         if (ip->i_d.di_version == 3) {
 305                 ASSERT(ip->i_d.di_ino == ino);
 306                 ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
 307                 VFS_I(ip)->i_version = 1;
 308                 ip->i_d.di_flags2 = pip ? 0 : xfs_flags2diflags2(ip,
 309                                 fsx->fsx_xflags);
 310                 ip->i_d.di_crtime.t_sec = (int32_t)VFS_I(ip)->i_mtime.tv_sec;
 311                 ip->i_d.di_crtime.t_nsec = (int32_t)VFS_I(ip)->i_mtime.tv_nsec;
 312                 ip->i_d.di_cowextsize = pip ? 0 : fsx->fsx_cowextsize;
 313         }
 314
 315         flags = XFS_ILOG_CORE;
 316         switch (mode & S_IFMT) {
 317         case S_IFIFO:
 318         case S_IFSOCK:
 319                 /* doesn't make sense to set an rdev for these */
 320                 rdev = 0;
 321                 /* FALLTHROUGH */
 322         case S_IFCHR:
 323         case S_IFBLK:
 324                 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
 325                 flags |= XFS_ILOG_DEV;
 326                 VFS_I(ip)->i_rdev = rdev;
 327                 break;
 328         case S_IFREG:
 329         case S_IFDIR:
 330                 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 331                         uint    di_flags = 0;
 332
 333                         if ((mode & S_IFMT) == S_IFDIR) {
 334                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 335                                         di_flags |= XFS_DIFLAG_RTINHERIT;
 336                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 337                                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 338                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 339                                 }
 340                         } else {
 341                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
 342                                         di_flags |= XFS_DIFLAG_REALTIME;
 343                                 }
 344                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 345                                         di_flags |= XFS_DIFLAG_EXTSIZE;
 346                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 347                                 }
 348                         }
 349                         if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 350                                 di_flags |= XFS_DIFLAG_PROJINHERIT;
 351                         ip->i_d.di_flags |= di_flags;
 352                 }
 353                 /* FALLTHROUGH */
 354         case S_IFLNK:
 355                 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 356                 ip->i_df.if_flags = XFS_IFEXTENTS;
 357                 ip->i_df.if_bytes = 0;
 358                 ip->i_df.if_u1.if_root = NULL;
 359                 break;
 360         default:
 361                 ASSERT(0);
 362         }
 363         /* Attribute fork settings for new inode. */
 364         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 365         ip->i_d.di_anextents = 0;
 366
 367         /*
 368          * set up the inode ops structure that the libxfs code relies on
 369          */
 370         if (XFS_ISDIR(ip))
 371                 ip->d_ops = ip->i_mount->m_dir_inode_ops;
 372         else
 373                 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
 374
 375         /*
 376          * Log the new values stuffed into the inode.
 377          */
 378         xfs_trans_log_inode(tp, ip, flags);
 379         *ipp = ip;
 380         return 0;
 381 }
 382
 383 void
 384 libxfs_iprint(
 385         xfs_inode_t             *ip)
 386 {
 387         struct xfs_icdinode     *dip;
 388         xfs_extnum_t            i = 0;
 389         struct xfs_ifork        *ifp;           /* inode fork pointer */
 390         struct xfs_iext_cursor  icur;
 391         xfs_bmbt_irec_t         rec;
 392
 393         printf("Inode %lx\n", (unsigned long)ip);
 394         printf("    i_ino %llx\n", (unsigned long long)ip->i_ino);
 395
 396         if (ip->i_df.if_flags & XFS_IFEXTENTS)
 397                 printf("EXTENTS ");
 398         printf("\n");
 399         printf("    i_df.if_bytes %d\n", ip->i_df.if_bytes);
 400         printf("    i_df.if_u1.if_root/if_data %lx\n",
 401                 (unsigned long)ip->i_df.if_u1.if_root);
 402         if (ip->i_df.if_flags & XFS_IFEXTENTS) {
 403                 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 404                 for_each_xfs_iext(ifp, &icur, &rec) {
 405                         printf("\t%d: startoff %llu, startblock 0x%llx,"
 406                                 " blockcount %llu, state %d\n",
 407                                 i, (unsigned long long)rec.br_startoff,
 408                                 (unsigned long long)rec.br_startblock,
 409                                 (unsigned long long)rec.br_blockcount,
 410                                 (int)rec.br_state);
 411                         i++;
 412                 }
 413         }
 414         printf("    i_df.if_broot %lx\n", (unsigned long)ip->i_df.if_broot);
 415         printf("    i_df.if_broot_bytes %x\n", ip->i_df.if_broot_bytes);
 416
 417         dip = &ip->i_d;
 418         printf("\nOn disk portion\n");
 419         printf("    di_mode %o\n", VFS_I(ip)->i_mode);
 420         printf("    di_version %x\n", (uint)dip->di_version);
 421         switch (ip->i_d.di_format) {
 422         case XFS_DINODE_FMT_LOCAL:
 423                 printf("    Inline inode\n");
 424                 break;
 425         case XFS_DINODE_FMT_EXTENTS:
 426                 printf("    Extents inode\n");
 427                 break;
 428         case XFS_DINODE_FMT_BTREE:
 429                 printf("    B-tree inode\n");
 430                 break;
 431         default:
 432                 printf("    Other inode\n");
 433                 break;
 434         }
 435         printf("   di_nlink %x\n", VFS_I(ip)->i_nlink);
 436         printf("   di_uid %d\n", dip->di_uid);
 437         printf("   di_gid %d\n", dip->di_gid);
 438         printf("   di_nextents %d\n", dip->di_nextents);
 439         printf("   di_size %llu\n", (unsigned long long)dip->di_size);
 440         printf("   di_gen %x\n", VFS_I(ip)->i_generation);
 441         printf("   di_extsize %d\n", dip->di_extsize);
 442         printf("   di_flags %x\n", dip->di_flags);
 443         printf("   di_nblocks %llu\n", (unsigned long long)dip->di_nblocks);
 444 }
 445
 446 /*
 447  * Writes a modified inode's changes out to the inode's on disk home.
 448  * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
 449  */
 450 int
 451 libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
 452 {
 453         xfs_inode_log_item_t    *iip;
 454         xfs_dinode_t            *dip;
 455         xfs_mount_t             *mp;
 456
 457         ASSERT(bp-b_log_item != NULL);
 458         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 459                 ip->i_d.di_nextents > ip->i_df.if_ext_max);
 460         ASSERT(ip->i_d.di_version > 1);
 461
 462         iip = ip->i_itemp;
 463         mp = ip->i_mount;
 464
 465         /* set *dip = inode's place in the buffer */
 466         dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 467
 468         ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC);
 469         if (XFS_ISREG(ip)) {
 470                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 471                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) );
 472         } else if (XFS_ISDIR(ip)) {
 473                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 474                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE)   ||
 475                         (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) );
 476         }
 477         ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks);
 478         ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize);
 479
 480         /* bump the change count on v3 inodes */
 481         if (ip->i_d.di_version == 3)
 482                 VFS_I(ip)->i_version++;
 483
 484         /* Check the inline fork data before we write out. */
 485         if (!libxfs_inode_verify_forks(ip, &xfs_default_ifork_ops))
 486                 return -EFSCORRUPTED;
 487
 488         /*
 489          * Copy the dirty parts of the inode into the on-disk
 490          * inode.  We always copy out the core of the inode,
 491          * because if the inode is dirty at all the core must
 492          * be.
 493          */
 494         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
 495
 496         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 497         if (XFS_IFORK_Q(ip))
 498                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 499
 500         /* generate the checksum. */
 501         xfs_dinode_calc_crc(mp, dip);
 502
 503         return 0;
 504 }
 505
 506 int
 507 libxfs_mod_incore_sb(
 508         struct xfs_mount *mp,
 509         int             field,
 510         int64_t         delta,
 511         int             rsvd)
 512 {
 513         long long       lcounter;       /* long counter for 64 bit fields */
 514
 515         switch (field) {
 516         case XFS_TRANS_SB_FDBLOCKS:
 517                 lcounter = (long long)mp->m_sb.sb_fdblocks;
 518                 lcounter += delta;
 519                 if (lcounter < 0)
 520                         return -ENOSPC;
 521                 mp->m_sb.sb_fdblocks = lcounter;
 522                 return 0;
 523         default:
 524                 ASSERT(0);
 525                 return -EINVAL;
 526         }
 527 }
 528
 529 /*
 530  * This routine allocates disk space for the given file.
 531  * Originally derived from xfs_alloc_file_space().
 532  */
 533 int
 534 libxfs_alloc_file_space(
 535         xfs_inode_t     *ip,
 536         xfs_off_t       offset,
 537         xfs_off_t       len,
 538         int             alloc_type,
 539         int             attr_flags)
 540 {
 541         xfs_mount_t     *mp;
 542         xfs_off_t       count;
 543         xfs_filblks_t   datablocks;
 544         xfs_filblks_t   allocated_fsb;
 545         xfs_filblks_t   allocatesize_fsb;
 546         xfs_bmbt_irec_t *imapp;
 547         xfs_bmbt_irec_t imaps[1];
 548         int             reccount;
 549         uint            resblks;
 550         xfs_fileoff_t   startoffset_fsb;
 551         xfs_trans_t     *tp;
 552         int             xfs_bmapi_flags;
 553         int             error;
 554
 555         if (len <= 0)
 556                 return -EINVAL;
 557
 558         count = len;
 559         error = 0;
 560         imapp = &imaps[0];
 561         reccount = 1;
 562         xfs_bmapi_flags = alloc_type ? XFS_BMAPI_PREALLOC : 0;
 563         mp = ip->i_mount;
 564         startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
 565         allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 566
 567         /* allocate file space until done or until there is an error */
 568         while (allocatesize_fsb && !error) {
 569                 datablocks = allocatesize_fsb;
 570
 571                 resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
 572                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
 573                                         0, 0, &tp);
 574                 /*
 575                  * Check for running out of space
 576                  */
 577                 if (error) {
 578                         ASSERT(error == -ENOSPC);
 579                         break;
 580                 }
 581                 xfs_trans_ijoin(tp, ip, 0);
 582
 583                 error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb,
 584                                 xfs_bmapi_flags, 0, imapp, &reccount);
 585
 586                 if (error)
 587                         goto error0;
 588
 589                 /*
 590                  * Complete the transaction
 591                  */
 592                 error = xfs_trans_commit(tp);
 593                 if (error)
 594                         break;
 595
 596                 allocated_fsb = imapp->br_blockcount;
 597                 if (reccount == 0)
 598                         return -ENOSPC;
 599
 600                 startoffset_fsb += allocated_fsb;
 601                 allocatesize_fsb -= allocated_fsb;
 602         }
 603         return error;
 604
 605 error0: /* Cancel bmap, cancel trans */
 606         xfs_trans_cancel(tp);
 607         return error;
 608 }
 609
 610 /*
 611  * Wrapper around call to libxfs_ialloc. Takes care of committing and
 612  * allocating a new transaction as needed.
 613  *
 614  * Originally there were two copies of this code - one in mkfs, the
 615  * other in repair - now there is just the one.
 616  */
 617 int
 618 libxfs_inode_alloc(
 619         xfs_trans_t     **tp,
 620         xfs_inode_t     *pip,
 621         mode_t          mode,
 622         nlink_t         nlink,
 623         xfs_dev_t       rdev,
 624         struct cred     *cr,
 625         struct fsxattr  *fsx,
 626         xfs_inode_t     **ipp)
 627 {
 628         xfs_buf_t       *ialloc_context;
 629         xfs_inode_t     *ip;
 630         int             error;
 631
 632         ialloc_context = (xfs_buf_t *)0;
 633         error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx,
 634                            &ialloc_context, &ip);
 635         if (error) {
 636                 *ipp = NULL;
 637                 return error;
 638         }
 639         if (!ialloc_context && !ip) {
 640                 *ipp = NULL;
 641                 return -ENOSPC;
 642         }
 643
 644         if (ialloc_context) {
 645
 646                 xfs_trans_bhold(*tp, ialloc_context);
 647
 648                 error = xfs_trans_roll(tp);
 649                 if (error) {
 650                         fprintf(stderr, _("%s: cannot duplicate transaction: %s\n"),
 651                                 progname, strerror(error));
 652                         exit(1);
 653                 }
 654                 xfs_trans_bjoin(*tp, ialloc_context);
 655                 error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
 656                                    fsx, &ialloc_context, &ip);
 657                 if (!ip)
 658                         error = -ENOSPC;
 659                 if (error)
 660                         return error;
 661         }
 662
 663         *ipp = ip;
 664         return error;
 665 }
 666
 667 /*
 668  * Userspace versions of common diagnostic routines (varargs fun).
 669  */
 670 void
 671 libxfs_fs_repair_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 672 {
 673         va_list ap;
 674
 675         va_start(ap, fmt);
 676         vfprintf(stderr, fmt, ap);
 677         fprintf(stderr, "  This is a bug.\n");
 678         fprintf(stderr, "%s version %s\n", progname, VERSION);
 679         fprintf(stderr,
 680                 "Please capture the filesystem metadata with xfs_metadump and\n"
 681                 "report it to linux-xfs@vger.kernel.org\n");
 682         va_end(ap);
 683 }
 684
 685 void
 686 libxfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 687 {
 688         va_list ap;
 689
 690         va_start(ap, fmt);
 691         vfprintf(stderr, fmt, ap);
 692         fputs("\n", stderr);
 693         va_end(ap);
 694 }
 695
 696 void
 697 cmn_err(int level, char *fmt, ...)
 698 {
 699         va_list ap;
 700
 701         va_start(ap, fmt);
 702         vfprintf(stderr, fmt, ap);
 703         fputs("\n", stderr);
 704         va_end(ap);
 705 }
 706
 707 /*
 708  * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
 709  * values, and omit the stack trace unless the error level is tuned high.
 710  */
 711 void
 712 xfs_verifier_error(
 713         struct xfs_buf          *bp,
 714         int                     error,
 715         xfs_failaddr_t          failaddr)
 716 {
 717         xfs_buf_ioerror(bp, error);
 718
 719         xfs_alert(NULL, "Metadata %s detected at %p, %s block 0x%llx/0x%x",
 720                   bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
 721                   failaddr ? failaddr : __return_address,
 722                   bp->b_ops->name, bp->b_bn, BBTOB(bp->b_length));
 723 }
 724
 725 /*
 726  * Warnings for inode corruption problems.  Don't bother with the stack
 727  * trace unless the error level is turned up high.
 728  */
 729 void
 730 xfs_inode_verifier_error(
 731         struct xfs_inode        *ip,
 732         int                     error,
 733         const char              *name,
 734         void                    *buf,
 735         size_t                  bufsz,
 736         xfs_failaddr_t          failaddr)
 737 {
 738         xfs_alert(NULL, "Metadata %s detected at %p, inode 0x%llx %s",
 739                   error == -EFSBADCRC ? "CRC error" : "corruption",
 740                   failaddr ? failaddr : __return_address,
 741                   ip->i_ino, name);
 742 }
 743
 744 /*
 745  * This is called from I/O verifiers on v5 superblock filesystems. In the
 746  * kernel, it validates the metadata LSN parameter against the current LSN of
 747  * the active log. We don't have an active log in userspace so this kind of
 748  * validation is not required. Therefore, this function always returns true in
 749  * userspace.
 750  *
 751  * xfs_repair piggybacks off this mechanism to help track the largest metadata
 752  * LSN in use on a filesystem. Keep a record of the largest LSN seen such that
 753  * repair can validate it against the state of the log.
 754  */
 755 xfs_lsn_t       libxfs_max_lsn = 0;
 756 pthread_mutex_t libxfs_max_lsn_lock = PTHREAD_MUTEX_INITIALIZER;
 757
 758 bool
 759 xfs_log_check_lsn(
 760         struct xfs_mount        *mp,
 761         xfs_lsn_t               lsn)
 762 {
 763         int                     cycle = CYCLE_LSN(lsn);
 764         int                     block = BLOCK_LSN(lsn);
 765         int                     max_cycle;
 766         int                     max_block;
 767
 768         if (lsn == NULLCOMMITLSN)
 769                 return true;
 770
 771         pthread_mutex_lock(&libxfs_max_lsn_lock);
 772
 773         max_cycle = CYCLE_LSN(libxfs_max_lsn);
 774         max_block = BLOCK_LSN(libxfs_max_lsn);
 775
 776         if ((cycle > max_cycle) ||
 777             (cycle == max_cycle && block > max_block))
 778                 libxfs_max_lsn = lsn;
 779
 780         pthread_mutex_unlock(&libxfs_max_lsn_lock);
 781
 782         return true;
 783 }
 784
 785 static struct xfs_buftarg *
 786 xfs_find_bdev_for_inode(
 787         struct xfs_inode        *ip)
 788 {
 789         struct xfs_mount        *mp = ip->i_mount;
 790
 791         if (XFS_IS_REALTIME_INODE(ip))
 792                 return mp->m_rtdev_targp;
 793         return mp->m_ddev_targp;
 794 }
 795
 796 static xfs_daddr_t
 797 xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 798 {
 799         if (XFS_IS_REALTIME_INODE(ip))
 800                  return XFS_FSB_TO_BB(ip->i_mount, fsb);
 801         return XFS_FSB_TO_DADDR(ip->i_mount, (fsb));
 802 }
 803
 804 int
 805 libxfs_zero_extent(
 806         struct xfs_inode *ip,
 807         xfs_fsblock_t   start_fsb,
 808         xfs_off_t       count_fsb)
 809 {
 810         xfs_daddr_t     sector = xfs_fsb_to_db(ip, start_fsb);
 811         ssize_t         size = XFS_FSB_TO_BB(ip->i_mount, count_fsb);
 812
 813         return libxfs_device_zero(xfs_find_bdev_for_inode(ip), sector, size);
 814 }
 815
 816 unsigned int
 817 hweight8(unsigned int w)
 818 {
 819         unsigned int res = w - ((w >> 1) & 0x55);
 820         res = (res & 0x33) + ((res >> 2) & 0x33);
 821         return (res + (res >> 4)) & 0x0F;
 822 }
 823
 824 unsigned int
 825 hweight32(unsigned int w)
 826 {
 827         unsigned int res = w - ((w >> 1) & 0x55555555);
 828         res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 829         res = (res + (res >> 4)) & 0x0F0F0F0F;
 830         res = res + (res >> 8);
 831         return (res + (res >> 16)) & 0x000000FF;
 832 }
 833
 834 unsigned int
 835 hweight64(__u64 w)
 836 {
 837         return hweight32((unsigned int)w) +
 838                hweight32((unsigned int)(w >> 32));
 839 }
 840