libxfs/util.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs_priv.h"
   8 #include "libxfs.h"
   9 #include "libxfs_io.h"
  10 #include "init.h"
  11 #include "xfs_fs.h"
  12 #include "xfs_shared.h"
  13 #include "xfs_format.h"
  14 #include "xfs_log_format.h"
  15 #include "xfs_trans_resv.h"
  16 #include "xfs_mount.h"
  17 #include "xfs_defer.h"
  18 #include "xfs_inode_buf.h"
  19 #include "xfs_inode_fork.h"
  20 #include "xfs_inode.h"
  21 #include "xfs_trans.h"
  22 #include "xfs_bmap.h"
  23 #include "xfs_bmap_btree.h"
  24 #include "xfs_trans_space.h"
  25 #include "xfs_ialloc.h"
  26 #include "xfs_alloc.h"
  27 #include "xfs_bit.h"
  28 #include "xfs_da_format.h"
  29 #include "xfs_da_btree.h"
  30 #include "xfs_dir2_priv.h"
  31
  32 /*
  33  * Calculate the worst case log unit reservation for a given superblock
  34  * configuration. Copied and munged from the kernel code, and assumes a
  35  * worse case header usage (maximum log buffer sizes)
  36  */
  37 int
  38 xfs_log_calc_unit_res(
  39         struct xfs_mount        *mp,
  40         int                     unit_bytes)
  41 {
  42         int                     iclog_space;
  43         int                     iclog_header_size;
  44         int                     iclog_size;
  45         uint                    num_headers;
  46
  47         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
  48                 iclog_size = XLOG_MAX_RECORD_BSIZE;
  49                 iclog_header_size = BBTOB(iclog_size / XLOG_HEADER_CYCLE_SIZE);
  50         } else {
  51                 iclog_size = XLOG_BIG_RECORD_BSIZE;
  52                 iclog_header_size = BBSIZE;
  53         }
  54
  55         /*
  56          * Permanent reservations have up to 'cnt'-1 active log operations
  57          * in the log.  A unit in this case is the amount of space for one
  58          * of these log operations.  Normal reservations have a cnt of 1
  59          * and their unit amount is the total amount of space required.
  60          *
  61          * The following lines of code account for non-transaction data
  62          * which occupy space in the on-disk log.
  63          *
  64          * Normal form of a transaction is:
  65          * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
  66          * and then there are LR hdrs, split-recs and roundoff at end of syncs.
  67          *
  68          * We need to account for all the leadup data and trailer data
  69          * around the transaction data.
  70          * And then we need to account for the worst case in terms of using
  71          * more space.
  72          * The worst case will happen if:
  73          * - the placement of the transaction happens to be such that the
  74          *   roundoff is at its maximum
  75          * - the transaction data is synced before the commit record is synced
  76          *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
  77          *   Therefore the commit record is in its own Log Record.
  78          *   This can happen as the commit record is called with its
  79          *   own region to xlog_write().
  80          *   This then means that in the worst case, roundoff can happen for
  81          *   the commit-rec as well.
  82          *   The commit-rec is smaller than padding in this scenario and so it is
  83          *   not added separately.
  84          */
  85
  86         /* for trans header */
  87         unit_bytes += sizeof(xlog_op_header_t);
  88         unit_bytes += sizeof(xfs_trans_header_t);
  89
  90         /* for start-rec */
  91         unit_bytes += sizeof(xlog_op_header_t);
  92
  93         /*
  94          * for LR headers - the space for data in an iclog is the size minus
  95          * the space used for the headers. If we use the iclog size, then we
  96          * undercalculate the number of headers required.
  97          *
  98          * Furthermore - the addition of op headers for split-recs might
  99          * increase the space required enough to require more log and op
 100          * headers, so take that into account too.
 101          *
 102          * IMPORTANT: This reservation makes the assumption that if this
 103          * transaction is the first in an iclog and hence has the LR headers
 104          * accounted to it, then the remaining space in the iclog is
 105          * exclusively for this transaction.  i.e. if the transaction is larger
 106          * than the iclog, it will be the only thing in that iclog.
 107          * Fundamentally, this means we must pass the entire log vector to
 108          * xlog_write to guarantee this.
 109          */
 110         iclog_space = iclog_size - iclog_header_size;
 111         num_headers = howmany(unit_bytes, iclog_space);
 112
 113         /* for split-recs - ophdrs added when data split over LRs */
 114         unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 115
 116         /* add extra header reservations if we overrun */
 117         while (!num_headers ||
 118                howmany(unit_bytes, iclog_space) > num_headers) {
 119                 unit_bytes += sizeof(xlog_op_header_t);
 120                 num_headers++;
 121         }
 122         unit_bytes += iclog_header_size * num_headers;
 123
 124         /* for commit-rec LR header - note: padding will subsume the ophdr */
 125         unit_bytes += iclog_header_size;
 126
 127         /* for roundoff padding for transaction data and one for commit record */
 128         if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
 129                 /* log su roundoff */
 130                 unit_bytes += 2 * mp->m_sb.sb_logsunit;
 131         } else {
 132                 /* BB roundoff */
 133                 unit_bytes += 2 * BBSIZE;
 134         }
 135
 136         return unit_bytes;
 137 }
 138
 139 /*
 140  * Change the requested timestamp in the given inode.
 141  *
 142  * This was once shared with the kernel, but has diverged to the point
 143  * where it's no longer worth the hassle of maintaining common code.
 144  */
 145 void
 146 libxfs_trans_ichgtime(
 147         struct xfs_trans        *tp,
 148         struct xfs_inode        *ip,
 149         int                     flags)
 150 {
 151         struct timespec tv;
 152         struct timeval  stv;
 153
 154         gettimeofday(&stv, (struct timezone *)0);
 155         tv.tv_sec = stv.tv_sec;
 156         tv.tv_nsec = stv.tv_usec * 1000;
 157         if (flags & XFS_ICHGTIME_MOD)
 158                 VFS_I(ip)->i_mtime = tv;
 159         if (flags & XFS_ICHGTIME_CHG)
 160                 VFS_I(ip)->i_ctime = tv;
 161         if (flags & XFS_ICHGTIME_CREATE) {
 162                 ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
 163                 ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
 164         }
 165 }
 166
 167 STATIC uint16_t
 168 xfs_flags2diflags(
 169         struct xfs_inode        *ip,
 170         unsigned int            xflags)
 171 {
 172         /* can't set PREALLOC this way, just preserve it */
 173         uint16_t                di_flags =
 174                 (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
 175
 176         if (xflags & FS_XFLAG_IMMUTABLE)
 177                 di_flags |= XFS_DIFLAG_IMMUTABLE;
 178         if (xflags & FS_XFLAG_APPEND)
 179                 di_flags |= XFS_DIFLAG_APPEND;
 180         if (xflags & FS_XFLAG_SYNC)
 181                 di_flags |= XFS_DIFLAG_SYNC;
 182         if (xflags & FS_XFLAG_NOATIME)
 183                 di_flags |= XFS_DIFLAG_NOATIME;
 184         if (xflags & FS_XFLAG_NODUMP)
 185                 di_flags |= XFS_DIFLAG_NODUMP;
 186         if (xflags & FS_XFLAG_NODEFRAG)
 187                 di_flags |= XFS_DIFLAG_NODEFRAG;
 188         if (xflags & FS_XFLAG_FILESTREAM)
 189                 di_flags |= XFS_DIFLAG_FILESTREAM;
 190         if (S_ISDIR(VFS_I(ip)->i_mode)) {
 191                 if (xflags & FS_XFLAG_RTINHERIT)
 192                         di_flags |= XFS_DIFLAG_RTINHERIT;
 193                 if (xflags & FS_XFLAG_NOSYMLINKS)
 194                         di_flags |= XFS_DIFLAG_NOSYMLINKS;
 195                 if (xflags & FS_XFLAG_EXTSZINHERIT)
 196                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 197                 if (xflags & FS_XFLAG_PROJINHERIT)
 198                         di_flags |= XFS_DIFLAG_PROJINHERIT;
 199         } else if (S_ISREG(VFS_I(ip)->i_mode)) {
 200                 if (xflags & FS_XFLAG_REALTIME)
 201                         di_flags |= XFS_DIFLAG_REALTIME;
 202                 if (xflags & FS_XFLAG_EXTSIZE)
 203                         di_flags |= XFS_DIFLAG_EXTSIZE;
 204         }
 205
 206         return di_flags;
 207 }
 208
 209 STATIC uint64_t
 210 xfs_flags2diflags2(
 211         struct xfs_inode        *ip,
 212         unsigned int            xflags)
 213 {
 214         uint64_t                di_flags2 =
 215                 (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
 216
 217         if (xflags & FS_XFLAG_DAX)
 218                 di_flags2 |= XFS_DIFLAG2_DAX;
 219         if (xflags & FS_XFLAG_COWEXTSIZE)
 220                 di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 221
 222         return di_flags2;
 223 }
 224
 225 /*
 226  * Allocate an inode on disk and return a copy of its in-core version.
 227  * Set mode, nlink, and rdev appropriately within the inode.
 228  * The uid and gid for the inode are set according to the contents of
 229  * the given cred structure.
 230  *
 231  * This was once shared with the kernel, but has diverged to the point
 232  * where it's no longer worth the hassle of maintaining common code.
 233  */
 234 static int
 235 libxfs_ialloc(
 236         xfs_trans_t     *tp,
 237         xfs_inode_t     *pip,
 238         mode_t          mode,
 239         nlink_t         nlink,
 240         xfs_dev_t       rdev,
 241         struct cred     *cr,
 242         struct fsxattr  *fsx,
 243         xfs_buf_t       **ialloc_context,
 244         xfs_inode_t     **ipp)
 245 {
 246         xfs_ino_t       ino;
 247         xfs_inode_t     *ip;
 248         uint            flags;
 249         int             error;
 250
 251         /*
 252          * Call the space management code to pick
 253          * the on-disk inode to be allocated.
 254          */
 255         error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
 256                             ialloc_context, &ino);
 257         if (error != 0)
 258                 return error;
 259         if (*ialloc_context || ino == NULLFSINO) {
 260                 *ipp = NULL;
 261                 return 0;
 262         }
 263         ASSERT(*ialloc_context == NULL);
 264
 265         error = libxfs_iget(tp->t_mountp, tp, ino, 0, &ip,
 266                         &xfs_default_ifork_ops);
 267         if (error != 0)
 268                 return error;
 269         ASSERT(ip != NULL);
 270
 271         VFS_I(ip)->i_mode = mode;
 272         set_nlink(VFS_I(ip), nlink);
 273         ip->i_d.di_uid = cr->cr_uid;
 274         ip->i_d.di_gid = cr->cr_gid;
 275         xfs_set_projid(&ip->i_d, pip ? 0 : fsx->fsx_projid);
 276         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD);
 277
 278         /*
 279          * We only support filesystems that understand v2 format inodes. So if
 280          * this is currently an old format inode, then change the inode version
 281          * number now.  This way we only do the conversion here rather than here
 282          * and in the flush/logging code.
 283          */
 284         if (ip->i_d.di_version == 1) {
 285                 ip->i_d.di_version = 2;
 286                 /*
 287                  * old link count, projid_lo/hi field, pad field
 288                  * already zeroed
 289                  */
 290         }
 291
 292         if (pip && (VFS_I(pip)->i_mode & S_ISGID)) {
 293                 ip->i_d.di_gid = pip->i_d.di_gid;
 294                 if ((VFS_I(pip)->i_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR)
 295                         VFS_I(ip)->i_mode |= S_ISGID;
 296         }
 297
 298         ip->i_d.di_size = 0;
 299         ip->i_d.di_nextents = 0;
 300         ASSERT(ip->i_d.di_nblocks == 0);
 301         ip->i_d.di_extsize = pip ? 0 : fsx->fsx_extsize;
 302         ip->i_d.di_dmevmask = 0;
 303         ip->i_d.di_dmstate = 0;
 304         ip->i_d.di_flags = pip ? 0 : xfs_flags2diflags(ip, fsx->fsx_xflags);
 305
 306         if (ip->i_d.di_version == 3) {
 307                 ASSERT(ip->i_d.di_ino == ino);
 308                 ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
 309                 VFS_I(ip)->i_version = 1;
 310                 ip->i_d.di_flags2 = pip ? 0 : xfs_flags2diflags2(ip,
 311                                 fsx->fsx_xflags);
 312                 ip->i_d.di_crtime.t_sec = (int32_t)VFS_I(ip)->i_mtime.tv_sec;
 313                 ip->i_d.di_crtime.t_nsec = (int32_t)VFS_I(ip)->i_mtime.tv_nsec;
 314                 ip->i_d.di_cowextsize = pip ? 0 : fsx->fsx_cowextsize;
 315         }
 316
 317         flags = XFS_ILOG_CORE;
 318         switch (mode & S_IFMT) {
 319         case S_IFIFO:
 320         case S_IFSOCK:
 321                 /* doesn't make sense to set an rdev for these */
 322                 rdev = 0;
 323                 /* FALLTHROUGH */
 324         case S_IFCHR:
 325         case S_IFBLK:
 326                 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
 327                 flags |= XFS_ILOG_DEV;
 328                 VFS_I(ip)->i_rdev = rdev;
 329                 break;
 330         case S_IFREG:
 331         case S_IFDIR:
 332                 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 333                         uint    di_flags = 0;
 334
 335                         if ((mode & S_IFMT) == S_IFDIR) {
 336                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 337                                         di_flags |= XFS_DIFLAG_RTINHERIT;
 338                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 339                                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 340                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 341                                 }
 342                         } else {
 343                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
 344                                         di_flags |= XFS_DIFLAG_REALTIME;
 345                                 }
 346                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 347                                         di_flags |= XFS_DIFLAG_EXTSIZE;
 348                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 349                                 }
 350                         }
 351                         if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 352                                 di_flags |= XFS_DIFLAG_PROJINHERIT;
 353                         ip->i_d.di_flags |= di_flags;
 354                 }
 355                 /* FALLTHROUGH */
 356         case S_IFLNK:
 357                 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 358                 ip->i_df.if_flags = XFS_IFEXTENTS;
 359                 ip->i_df.if_bytes = 0;
 360                 ip->i_df.if_u1.if_root = NULL;
 361                 break;
 362         default:
 363                 ASSERT(0);
 364         }
 365         /* Attribute fork settings for new inode. */
 366         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 367         ip->i_d.di_anextents = 0;
 368
 369         /*
 370          * set up the inode ops structure that the libxfs code relies on
 371          */
 372         if (XFS_ISDIR(ip))
 373                 ip->d_ops = ip->i_mount->m_dir_inode_ops;
 374         else
 375                 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
 376
 377         /*
 378          * Log the new values stuffed into the inode.
 379          */
 380         xfs_trans_ijoin(tp, ip, 0);
 381         xfs_trans_log_inode(tp, ip, flags);
 382         *ipp = ip;
 383         return 0;
 384 }
 385
 386 /*
 387  * Writes a modified inode's changes out to the inode's on disk home.
 388  * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
 389  */
 390 int
 391 libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
 392 {
 393         xfs_inode_log_item_t    *iip;
 394         xfs_dinode_t            *dip;
 395         xfs_mount_t             *mp;
 396
 397         ASSERT(bp-b_log_item != NULL);
 398         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 399                 ip->i_d.di_nextents > ip->i_df.if_ext_max);
 400         ASSERT(ip->i_d.di_version > 1);
 401
 402         iip = ip->i_itemp;
 403         mp = ip->i_mount;
 404
 405         /* set *dip = inode's place in the buffer */
 406         dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 407
 408         ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC);
 409         if (XFS_ISREG(ip)) {
 410                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 411                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) );
 412         } else if (XFS_ISDIR(ip)) {
 413                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 414                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE)   ||
 415                         (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) );
 416         }
 417         ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks);
 418         ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize);
 419
 420         /* bump the change count on v3 inodes */
 421         if (ip->i_d.di_version == 3)
 422                 VFS_I(ip)->i_version++;
 423
 424         /* Check the inline fork data before we write out. */
 425         if (!libxfs_inode_verify_forks(ip, &xfs_default_ifork_ops))
 426                 return -EFSCORRUPTED;
 427
 428         /*
 429          * Copy the dirty parts of the inode into the on-disk
 430          * inode.  We always copy out the core of the inode,
 431          * because if the inode is dirty at all the core must
 432          * be.
 433          */
 434         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
 435
 436         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 437         if (XFS_IFORK_Q(ip))
 438                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 439
 440         /* generate the checksum. */
 441         xfs_dinode_calc_crc(mp, dip);
 442
 443         return 0;
 444 }
 445
 446 int
 447 libxfs_mod_incore_sb(
 448         struct xfs_mount *mp,
 449         int             field,
 450         int64_t         delta,
 451         int             rsvd)
 452 {
 453         long long       lcounter;       /* long counter for 64 bit fields */
 454
 455         switch (field) {
 456         case XFS_TRANS_SB_FDBLOCKS:
 457                 lcounter = (long long)mp->m_sb.sb_fdblocks;
 458                 lcounter += delta;
 459                 if (lcounter < 0)
 460                         return -ENOSPC;
 461                 mp->m_sb.sb_fdblocks = lcounter;
 462                 return 0;
 463         default:
 464                 ASSERT(0);
 465                 return -EINVAL;
 466         }
 467 }
 468
 469 /*
 470  * This routine allocates disk space for the given file.
 471  * Originally derived from xfs_alloc_file_space().
 472  */
 473 int
 474 libxfs_alloc_file_space(
 475         xfs_inode_t     *ip,
 476         xfs_off_t       offset,
 477         xfs_off_t       len,
 478         int             alloc_type,
 479         int             attr_flags)
 480 {
 481         xfs_mount_t     *mp;
 482         xfs_off_t       count;
 483         xfs_filblks_t   datablocks;
 484         xfs_filblks_t   allocated_fsb;
 485         xfs_filblks_t   allocatesize_fsb;
 486         xfs_bmbt_irec_t *imapp;
 487         xfs_bmbt_irec_t imaps[1];
 488         int             reccount;
 489         uint            resblks;
 490         xfs_fileoff_t   startoffset_fsb;
 491         xfs_trans_t     *tp;
 492         int             xfs_bmapi_flags;
 493         int             error;
 494
 495         if (len <= 0)
 496                 return -EINVAL;
 497
 498         count = len;
 499         error = 0;
 500         imapp = &imaps[0];
 501         reccount = 1;
 502         xfs_bmapi_flags = alloc_type ? XFS_BMAPI_PREALLOC : 0;
 503         mp = ip->i_mount;
 504         startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
 505         allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 506
 507         /* allocate file space until done or until there is an error */
 508         while (allocatesize_fsb && !error) {
 509                 datablocks = allocatesize_fsb;
 510
 511                 resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
 512                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
 513                                         0, 0, &tp);
 514                 /*
 515                  * Check for running out of space
 516                  */
 517                 if (error) {
 518                         ASSERT(error == -ENOSPC);
 519                         break;
 520                 }
 521                 xfs_trans_ijoin(tp, ip, 0);
 522
 523                 error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb,
 524                                 xfs_bmapi_flags, 0, imapp, &reccount);
 525
 526                 if (error)
 527                         goto error0;
 528
 529                 /*
 530                  * Complete the transaction
 531                  */
 532                 error = xfs_trans_commit(tp);
 533                 if (error)
 534                         break;
 535
 536                 allocated_fsb = imapp->br_blockcount;
 537                 if (reccount == 0)
 538                         return -ENOSPC;
 539
 540                 startoffset_fsb += allocated_fsb;
 541                 allocatesize_fsb -= allocated_fsb;
 542         }
 543         return error;
 544
 545 error0: /* Cancel bmap, cancel trans */
 546         xfs_trans_cancel(tp);
 547         return error;
 548 }
 549
 550 /*
 551  * Wrapper around call to libxfs_ialloc. Takes care of committing and
 552  * allocating a new transaction as needed.
 553  *
 554  * Originally there were two copies of this code - one in mkfs, the
 555  * other in repair - now there is just the one.
 556  */
 557 int
 558 libxfs_inode_alloc(
 559         xfs_trans_t     **tp,
 560         xfs_inode_t     *pip,
 561         mode_t          mode,
 562         nlink_t         nlink,
 563         xfs_dev_t       rdev,
 564         struct cred     *cr,
 565         struct fsxattr  *fsx,
 566         xfs_inode_t     **ipp)
 567 {
 568         xfs_buf_t       *ialloc_context;
 569         xfs_inode_t     *ip;
 570         int             error;
 571
 572         ialloc_context = (xfs_buf_t *)0;
 573         error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx,
 574                            &ialloc_context, &ip);
 575         if (error) {
 576                 *ipp = NULL;
 577                 return error;
 578         }
 579         if (!ialloc_context && !ip) {
 580                 *ipp = NULL;
 581                 return -ENOSPC;
 582         }
 583
 584         if (ialloc_context) {
 585
 586                 xfs_trans_bhold(*tp, ialloc_context);
 587
 588                 error = xfs_trans_roll(tp);
 589                 if (error) {
 590                         fprintf(stderr, _("%s: cannot duplicate transaction: %s\n"),
 591                                 progname, strerror(error));
 592                         exit(1);
 593                 }
 594                 xfs_trans_bjoin(*tp, ialloc_context);
 595                 error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
 596                                    fsx, &ialloc_context, &ip);
 597                 if (!ip)
 598                         error = -ENOSPC;
 599                 if (error)
 600                         return error;
 601         }
 602
 603         *ipp = ip;
 604         return error;
 605 }
 606
 607 /*
 608  * Userspace versions of common diagnostic routines (varargs fun).
 609  */
 610 void
 611 libxfs_fs_repair_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 612 {
 613         va_list ap;
 614
 615         va_start(ap, fmt);
 616         vfprintf(stderr, fmt, ap);
 617         fprintf(stderr, "  This is a bug.\n");
 618         fprintf(stderr, "%s version %s\n", progname, VERSION);
 619         fprintf(stderr,
 620                 "Please capture the filesystem metadata with xfs_metadump and\n"
 621                 "report it to linux-xfs@vger.kernel.org\n");
 622         va_end(ap);
 623 }
 624
 625 void
 626 libxfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 627 {
 628         va_list ap;
 629
 630         va_start(ap, fmt);
 631         vfprintf(stderr, fmt, ap);
 632         fputs("\n", stderr);
 633         va_end(ap);
 634 }
 635
 636 void
 637 cmn_err(int level, char *fmt, ...)
 638 {
 639         va_list ap;
 640
 641         va_start(ap, fmt);
 642         vfprintf(stderr, fmt, ap);
 643         fputs("\n", stderr);
 644         va_end(ap);
 645 }
 646
 647 /*
 648  * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
 649  * values, and omit the stack trace unless the error level is tuned high.
 650  */
 651 void
 652 xfs_verifier_error(
 653         struct xfs_buf          *bp,
 654         int                     error,
 655         xfs_failaddr_t          failaddr)
 656 {
 657         xfs_buf_ioerror(bp, error);
 658
 659         xfs_alert(NULL, "Metadata %s detected at %p, %s block 0x%llx/0x%x",
 660                   bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
 661                   failaddr ? failaddr : __return_address,
 662                   bp->b_ops->name, bp->b_bn, BBTOB(bp->b_length));
 663 }
 664
 665 /*
 666  * Warnings for inode corruption problems.  Don't bother with the stack
 667  * trace unless the error level is turned up high.
 668  */
 669 void
 670 xfs_inode_verifier_error(
 671         struct xfs_inode        *ip,
 672         int                     error,
 673         const char              *name,
 674         void                    *buf,
 675         size_t                  bufsz,
 676         xfs_failaddr_t          failaddr)
 677 {
 678         xfs_alert(NULL, "Metadata %s detected at %p, inode 0x%llx %s",
 679                   error == -EFSBADCRC ? "CRC error" : "corruption",
 680                   failaddr ? failaddr : __return_address,
 681                   ip->i_ino, name);
 682 }
 683
 684 /*
 685  * This is called from I/O verifiers on v5 superblock filesystems. In the
 686  * kernel, it validates the metadata LSN parameter against the current LSN of
 687  * the active log. We don't have an active log in userspace so this kind of
 688  * validation is not required. Therefore, this function always returns true in
 689  * userspace.
 690  *
 691  * xfs_repair piggybacks off this mechanism to help track the largest metadata
 692  * LSN in use on a filesystem. Keep a record of the largest LSN seen such that
 693  * repair can validate it against the state of the log.
 694  */
 695 xfs_lsn_t       libxfs_max_lsn = 0;
 696 static pthread_mutex_t  libxfs_max_lsn_lock = PTHREAD_MUTEX_INITIALIZER;
 697
 698 bool
 699 xfs_log_check_lsn(
 700         struct xfs_mount        *mp,
 701         xfs_lsn_t               lsn)
 702 {
 703         int                     cycle = CYCLE_LSN(lsn);
 704         int                     block = BLOCK_LSN(lsn);
 705         int                     max_cycle;
 706         int                     max_block;
 707
 708         if (lsn == NULLCOMMITLSN)
 709                 return true;
 710
 711         pthread_mutex_lock(&libxfs_max_lsn_lock);
 712
 713         max_cycle = CYCLE_LSN(libxfs_max_lsn);
 714         max_block = BLOCK_LSN(libxfs_max_lsn);
 715
 716         if ((cycle > max_cycle) ||
 717             (cycle == max_cycle && block > max_block))
 718                 libxfs_max_lsn = lsn;
 719
 720         pthread_mutex_unlock(&libxfs_max_lsn_lock);
 721
 722         return true;
 723 }
 724
 725 static struct xfs_buftarg *
 726 xfs_find_bdev_for_inode(
 727         struct xfs_inode        *ip)
 728 {
 729         struct xfs_mount        *mp = ip->i_mount;
 730
 731         if (XFS_IS_REALTIME_INODE(ip))
 732                 return mp->m_rtdev_targp;
 733         return mp->m_ddev_targp;
 734 }
 735
 736 static xfs_daddr_t
 737 xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 738 {
 739         if (XFS_IS_REALTIME_INODE(ip))
 740                  return XFS_FSB_TO_BB(ip->i_mount, fsb);
 741         return XFS_FSB_TO_DADDR(ip->i_mount, (fsb));
 742 }
 743
 744 int
 745 libxfs_zero_extent(
 746         struct xfs_inode *ip,
 747         xfs_fsblock_t   start_fsb,
 748         xfs_off_t       count_fsb)
 749 {
 750         xfs_daddr_t     sector = xfs_fsb_to_db(ip, start_fsb);
 751         ssize_t         size = XFS_FSB_TO_BB(ip->i_mount, count_fsb);
 752
 753         return libxfs_device_zero(xfs_find_bdev_for_inode(ip), sector, size);
 754 }
 755
 756 unsigned int
 757 hweight8(unsigned int w)
 758 {
 759         unsigned int res = w - ((w >> 1) & 0x55);
 760         res = (res & 0x33) + ((res >> 2) & 0x33);
 761         return (res + (res >> 4)) & 0x0F;
 762 }
 763
 764 unsigned int
 765 hweight32(unsigned int w)
 766 {
 767         unsigned int res = w - ((w >> 1) & 0x55555555);
 768         res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 769         res = (res + (res >> 4)) & 0x0F0F0F0F;
 770         res = res + (res >> 8);
 771         return (res + (res >> 16)) & 0x000000FF;
 772 }
 773
 774 unsigned int
 775 hweight64(__u64 w)
 776 {
 777         return hweight32((unsigned int)w) +
 778                hweight32((unsigned int)(w >> 32));
 779 }
 780