libxfs/util.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18
  19 #include "libxfs_priv.h"
  20 #include "libxfs_io.h"
  21 #include "init.h"
  22 #include "xfs_fs.h"
  23 #include "xfs_shared.h"
  24 #include "xfs_format.h"
  25 #include "xfs_log_format.h"
  26 #include "xfs_trans_resv.h"
  27 #include "xfs_mount.h"
  28 #include "xfs_defer.h"
  29 #include "xfs_inode_buf.h"
  30 #include "xfs_inode_fork.h"
  31 #include "xfs_inode.h"
  32 #include "xfs_trans.h"
  33 #include "xfs_bmap.h"
  34 #include "xfs_bmap_btree.h"
  35 #include "xfs_trans_space.h"
  36 #include "xfs_ialloc.h"
  37 #include "xfs_alloc.h"
  38 #include "xfs_bit.h"
  39 #include "xfs_da_format.h"
  40 #include "xfs_da_btree.h"
  41 #include "xfs_dir2_priv.h"
  42
  43 /*
  44  * Calculate the worst case log unit reservation for a given superblock
  45  * configuration. Copied and munged from the kernel code, and assumes a
  46  * worse case header usage (maximum log buffer sizes)
  47  */
  48 int
  49 xfs_log_calc_unit_res(
  50         struct xfs_mount        *mp,
  51         int                     unit_bytes)
  52 {
  53         int                     iclog_space;
  54         int                     iclog_header_size;
  55         int                     iclog_size;
  56         uint                    num_headers;
  57
  58         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
  59                 iclog_size = XLOG_MAX_RECORD_BSIZE;
  60                 iclog_header_size = BBTOB(iclog_size / XLOG_HEADER_CYCLE_SIZE);
  61         } else {
  62                 iclog_size = XLOG_BIG_RECORD_BSIZE;
  63                 iclog_header_size = BBSIZE;
  64         }
  65
  66         /*
  67          * Permanent reservations have up to 'cnt'-1 active log operations
  68          * in the log.  A unit in this case is the amount of space for one
  69          * of these log operations.  Normal reservations have a cnt of 1
  70          * and their unit amount is the total amount of space required.
  71          *
  72          * The following lines of code account for non-transaction data
  73          * which occupy space in the on-disk log.
  74          *
  75          * Normal form of a transaction is:
  76          * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
  77          * and then there are LR hdrs, split-recs and roundoff at end of syncs.
  78          *
  79          * We need to account for all the leadup data and trailer data
  80          * around the transaction data.
  81          * And then we need to account for the worst case in terms of using
  82          * more space.
  83          * The worst case will happen if:
  84          * - the placement of the transaction happens to be such that the
  85          *   roundoff is at its maximum
  86          * - the transaction data is synced before the commit record is synced
  87          *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
  88          *   Therefore the commit record is in its own Log Record.
  89          *   This can happen as the commit record is called with its
  90          *   own region to xlog_write().
  91          *   This then means that in the worst case, roundoff can happen for
  92          *   the commit-rec as well.
  93          *   The commit-rec is smaller than padding in this scenario and so it is
  94          *   not added separately.
  95          */
  96
  97         /* for trans header */
  98         unit_bytes += sizeof(xlog_op_header_t);
  99         unit_bytes += sizeof(xfs_trans_header_t);
 100
 101         /* for start-rec */
 102         unit_bytes += sizeof(xlog_op_header_t);
 103
 104         /*
 105          * for LR headers - the space for data in an iclog is the size minus
 106          * the space used for the headers. If we use the iclog size, then we
 107          * undercalculate the number of headers required.
 108          *
 109          * Furthermore - the addition of op headers for split-recs might
 110          * increase the space required enough to require more log and op
 111          * headers, so take that into account too.
 112          *
 113          * IMPORTANT: This reservation makes the assumption that if this
 114          * transaction is the first in an iclog and hence has the LR headers
 115          * accounted to it, then the remaining space in the iclog is
 116          * exclusively for this transaction.  i.e. if the transaction is larger
 117          * than the iclog, it will be the only thing in that iclog.
 118          * Fundamentally, this means we must pass the entire log vector to
 119          * xlog_write to guarantee this.
 120          */
 121         iclog_space = iclog_size - iclog_header_size;
 122         num_headers = howmany(unit_bytes, iclog_space);
 123
 124         /* for split-recs - ophdrs added when data split over LRs */
 125         unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 126
 127         /* add extra header reservations if we overrun */
 128         while (!num_headers ||
 129                howmany(unit_bytes, iclog_space) > num_headers) {
 130                 unit_bytes += sizeof(xlog_op_header_t);
 131                 num_headers++;
 132         }
 133         unit_bytes += iclog_header_size * num_headers;
 134
 135         /* for commit-rec LR header - note: padding will subsume the ophdr */
 136         unit_bytes += iclog_header_size;
 137
 138         /* for roundoff padding for transaction data and one for commit record */
 139         if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
 140                 /* log su roundoff */
 141                 unit_bytes += 2 * mp->m_sb.sb_logsunit;
 142         } else {
 143                 /* BB roundoff */
 144                 unit_bytes += 2 * BBSIZE;
 145         }
 146
 147         return unit_bytes;
 148 }
 149
 150 /*
 151  * Change the requested timestamp in the given inode.
 152  *
 153  * This was once shared with the kernel, but has diverged to the point
 154  * where it's no longer worth the hassle of maintaining common code.
 155  */
 156 void
 157 libxfs_trans_ichgtime(
 158         struct xfs_trans        *tp,
 159         struct xfs_inode        *ip,
 160         int                     flags)
 161 {
 162         struct timespec tv;
 163         struct timeval  stv;
 164
 165         gettimeofday(&stv, (struct timezone *)0);
 166         tv.tv_sec = stv.tv_sec;
 167         tv.tv_nsec = stv.tv_usec * 1000;
 168         if (flags & XFS_ICHGTIME_MOD)
 169                 VFS_I(ip)->i_mtime = tv;
 170         if (flags & XFS_ICHGTIME_CHG)
 171                 VFS_I(ip)->i_ctime = tv;
 172         if (flags & XFS_ICHGTIME_CREATE) {
 173                 ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
 174                 ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
 175         }
 176 }
 177
 178 STATIC uint16_t
 179 xfs_flags2diflags(
 180         struct xfs_inode        *ip,
 181         unsigned int            xflags)
 182 {
 183         /* can't set PREALLOC this way, just preserve it */
 184         uint16_t                di_flags =
 185                 (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
 186
 187         if (xflags & FS_XFLAG_IMMUTABLE)
 188                 di_flags |= XFS_DIFLAG_IMMUTABLE;
 189         if (xflags & FS_XFLAG_APPEND)
 190                 di_flags |= XFS_DIFLAG_APPEND;
 191         if (xflags & FS_XFLAG_SYNC)
 192                 di_flags |= XFS_DIFLAG_SYNC;
 193         if (xflags & FS_XFLAG_NOATIME)
 194                 di_flags |= XFS_DIFLAG_NOATIME;
 195         if (xflags & FS_XFLAG_NODUMP)
 196                 di_flags |= XFS_DIFLAG_NODUMP;
 197         if (xflags & FS_XFLAG_NODEFRAG)
 198                 di_flags |= XFS_DIFLAG_NODEFRAG;
 199         if (xflags & FS_XFLAG_FILESTREAM)
 200                 di_flags |= XFS_DIFLAG_FILESTREAM;
 201         if (S_ISDIR(VFS_I(ip)->i_mode)) {
 202                 if (xflags & FS_XFLAG_RTINHERIT)
 203                         di_flags |= XFS_DIFLAG_RTINHERIT;
 204                 if (xflags & FS_XFLAG_NOSYMLINKS)
 205                         di_flags |= XFS_DIFLAG_NOSYMLINKS;
 206                 if (xflags & FS_XFLAG_EXTSZINHERIT)
 207                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 208                 if (xflags & FS_XFLAG_PROJINHERIT)
 209                         di_flags |= XFS_DIFLAG_PROJINHERIT;
 210         } else if (S_ISREG(VFS_I(ip)->i_mode)) {
 211                 if (xflags & FS_XFLAG_REALTIME)
 212                         di_flags |= XFS_DIFLAG_REALTIME;
 213                 if (xflags & FS_XFLAG_EXTSIZE)
 214                         di_flags |= XFS_DIFLAG_EXTSIZE;
 215         }
 216
 217         return di_flags;
 218 }
 219
 220 STATIC uint64_t
 221 xfs_flags2diflags2(
 222         struct xfs_inode        *ip,
 223         unsigned int            xflags)
 224 {
 225         uint64_t                di_flags2 =
 226                 (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
 227
 228         if (xflags & FS_XFLAG_DAX)
 229                 di_flags2 |= XFS_DIFLAG2_DAX;
 230         if (xflags & FS_XFLAG_COWEXTSIZE)
 231                 di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 232
 233         return di_flags2;
 234 }
 235
 236 /*
 237  * Allocate an inode on disk and return a copy of its in-core version.
 238  * Set mode, nlink, and rdev appropriately within the inode.
 239  * The uid and gid for the inode are set according to the contents of
 240  * the given cred structure.
 241  *
 242  * This was once shared with the kernel, but has diverged to the point
 243  * where it's no longer worth the hassle of maintaining common code.
 244  */
 245 int
 246 libxfs_ialloc(
 247         xfs_trans_t     *tp,
 248         xfs_inode_t     *pip,
 249         mode_t          mode,
 250         nlink_t         nlink,
 251         xfs_dev_t       rdev,
 252         struct cred     *cr,
 253         struct fsxattr  *fsx,
 254         xfs_buf_t       **ialloc_context,
 255         xfs_inode_t     **ipp)
 256 {
 257         xfs_ino_t       ino;
 258         xfs_inode_t     *ip;
 259         uint            flags;
 260         int             error;
 261
 262         /*
 263          * Call the space management code to pick
 264          * the on-disk inode to be allocated.
 265          */
 266         error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
 267                             ialloc_context, &ino);
 268         if (error != 0)
 269                 return error;
 270         if (*ialloc_context || ino == NULLFSINO) {
 271                 *ipp = NULL;
 272                 return 0;
 273         }
 274         ASSERT(*ialloc_context == NULL);
 275
 276         error = xfs_trans_iget(tp->t_mountp, tp, ino, 0, 0, &ip);
 277         if (error != 0)
 278                 return error;
 279         ASSERT(ip != NULL);
 280
 281         VFS_I(ip)->i_mode = mode;
 282         set_nlink(VFS_I(ip), nlink);
 283         ip->i_d.di_uid = cr->cr_uid;
 284         ip->i_d.di_gid = cr->cr_gid;
 285         xfs_set_projid(&ip->i_d, pip ? 0 : fsx->fsx_projid);
 286         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD);
 287
 288         /*
 289          * We only support filesystems that understand v2 format inodes. So if
 290          * this is currently an old format inode, then change the inode version
 291          * number now.  This way we only do the conversion here rather than here
 292          * and in the flush/logging code.
 293          */
 294         if (ip->i_d.di_version == 1) {
 295                 ip->i_d.di_version = 2;
 296                 /*
 297                  * old link count, projid_lo/hi field, pad field
 298                  * already zeroed
 299                  */
 300         }
 301
 302         if (pip && (VFS_I(pip)->i_mode & S_ISGID)) {
 303                 ip->i_d.di_gid = pip->i_d.di_gid;
 304                 if ((VFS_I(pip)->i_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR)
 305                         VFS_I(ip)->i_mode |= S_ISGID;
 306         }
 307
 308         ip->i_d.di_size = 0;
 309         ip->i_d.di_nextents = 0;
 310         ASSERT(ip->i_d.di_nblocks == 0);
 311         ip->i_d.di_extsize = pip ? 0 : fsx->fsx_extsize;
 312         ip->i_d.di_dmevmask = 0;
 313         ip->i_d.di_dmstate = 0;
 314         ip->i_d.di_flags = pip ? 0 : xfs_flags2diflags(ip, fsx->fsx_xflags);
 315
 316         if (ip->i_d.di_version == 3) {
 317                 ASSERT(ip->i_d.di_ino == ino);
 318                 ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
 319                 VFS_I(ip)->i_version = 1;
 320                 ip->i_d.di_flags2 = pip ? 0 : xfs_flags2diflags2(ip,
 321                                 fsx->fsx_xflags);
 322                 ip->i_d.di_crtime.t_sec = (int32_t)VFS_I(ip)->i_mtime.tv_sec;
 323                 ip->i_d.di_crtime.t_nsec = (int32_t)VFS_I(ip)->i_mtime.tv_nsec;
 324                 ip->i_d.di_cowextsize = pip ? 0 : fsx->fsx_cowextsize;
 325         }
 326
 327         flags = XFS_ILOG_CORE;
 328         switch (mode & S_IFMT) {
 329         case S_IFIFO:
 330         case S_IFSOCK:
 331                 /* doesn't make sense to set an rdev for these */
 332                 rdev = 0;
 333                 /* FALLTHROUGH */
 334         case S_IFCHR:
 335         case S_IFBLK:
 336                 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
 337                 flags |= XFS_ILOG_DEV;
 338                 VFS_I(ip)->i_rdev = rdev;
 339                 break;
 340         case S_IFREG:
 341         case S_IFDIR:
 342                 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 343                         uint    di_flags = 0;
 344
 345                         if ((mode & S_IFMT) == S_IFDIR) {
 346                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 347                                         di_flags |= XFS_DIFLAG_RTINHERIT;
 348                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 349                                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 350                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 351                                 }
 352                         } else {
 353                                 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
 354                                         di_flags |= XFS_DIFLAG_REALTIME;
 355                                 }
 356                                 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 357                                         di_flags |= XFS_DIFLAG_EXTSIZE;
 358                                         ip->i_d.di_extsize = pip->i_d.di_extsize;
 359                                 }
 360                         }
 361                         if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
 362                                 di_flags |= XFS_DIFLAG_PROJINHERIT;
 363                         ip->i_d.di_flags |= di_flags;
 364                 }
 365                 /* FALLTHROUGH */
 366         case S_IFLNK:
 367                 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 368                 ip->i_df.if_flags = XFS_IFEXTENTS;
 369                 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
 370                 ip->i_df.if_u1.if_root = NULL;
 371                 break;
 372         default:
 373                 ASSERT(0);
 374         }
 375         /* Attribute fork settings for new inode. */
 376         ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
 377         ip->i_d.di_anextents = 0;
 378
 379         /*
 380          * set up the inode ops structure that the libxfs code relies on
 381          */
 382         if (XFS_ISDIR(ip))
 383                 ip->d_ops = ip->i_mount->m_dir_inode_ops;
 384         else
 385                 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
 386
 387         /*
 388          * Log the new values stuffed into the inode.
 389          */
 390         xfs_trans_log_inode(tp, ip, flags);
 391         *ipp = ip;
 392         return 0;
 393 }
 394
 395 void
 396 libxfs_iprint(
 397         xfs_inode_t             *ip)
 398 {
 399         struct xfs_icdinode     *dip;
 400         xfs_extnum_t            i = 0;
 401         xfs_ifork_t             *ifp;           /* inode fork pointer */
 402         struct xfs_iext_cursor  icur;
 403         xfs_bmbt_irec_t         rec;
 404
 405         printf("Inode %lx\n", (unsigned long)ip);
 406         printf("    i_ino %llx\n", (unsigned long long)ip->i_ino);
 407
 408         if (ip->i_df.if_flags & XFS_IFEXTENTS)
 409                 printf("EXTENTS ");
 410         printf("\n");
 411         printf("    i_df.if_bytes %d\n", ip->i_df.if_bytes);
 412         printf("    i_df.if_u1.if_root/if_data %lx\n",
 413                 (unsigned long)ip->i_df.if_u1.if_root);
 414         if (ip->i_df.if_flags & XFS_IFEXTENTS) {
 415                 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 416                 for_each_xfs_iext(ifp, &icur, &rec) {
 417                         printf("\t%d: startoff %llu, startblock 0x%llx,"
 418                                 " blockcount %llu, state %d\n",
 419                                 i, (unsigned long long)rec.br_startoff,
 420                                 (unsigned long long)rec.br_startblock,
 421                                 (unsigned long long)rec.br_blockcount,
 422                                 (int)rec.br_state);
 423                         i++;
 424                 }
 425         }
 426         printf("    i_df.if_broot %lx\n", (unsigned long)ip->i_df.if_broot);
 427         printf("    i_df.if_broot_bytes %x\n", ip->i_df.if_broot_bytes);
 428
 429         dip = &ip->i_d;
 430         printf("\nOn disk portion\n");
 431         printf("    di_mode %o\n", VFS_I(ip)->i_mode);
 432         printf("    di_version %x\n", (uint)dip->di_version);
 433         switch (ip->i_d.di_format) {
 434         case XFS_DINODE_FMT_LOCAL:
 435                 printf("    Inline inode\n");
 436                 break;
 437         case XFS_DINODE_FMT_EXTENTS:
 438                 printf("    Extents inode\n");
 439                 break;
 440         case XFS_DINODE_FMT_BTREE:
 441                 printf("    B-tree inode\n");
 442                 break;
 443         default:
 444                 printf("    Other inode\n");
 445                 break;
 446         }
 447         printf("   di_nlink %x\n", VFS_I(ip)->i_nlink);
 448         printf("   di_uid %d\n", dip->di_uid);
 449         printf("   di_gid %d\n", dip->di_gid);
 450         printf("   di_nextents %d\n", dip->di_nextents);
 451         printf("   di_size %llu\n", (unsigned long long)dip->di_size);
 452         printf("   di_gen %x\n", VFS_I(ip)->i_generation);
 453         printf("   di_extsize %d\n", dip->di_extsize);
 454         printf("   di_flags %x\n", dip->di_flags);
 455         printf("   di_nblocks %llu\n", (unsigned long long)dip->di_nblocks);
 456 }
 457
 458 /*
 459  * Writes a modified inode's changes out to the inode's on disk home.
 460  * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
 461  */
 462 int
 463 libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
 464 {
 465         xfs_inode_log_item_t    *iip;
 466         xfs_dinode_t            *dip;
 467         xfs_mount_t             *mp;
 468
 469         ASSERT(bp-b_fspriv != NULL);
 470         ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 471                 ip->i_d.di_nextents > ip->i_df.if_ext_max);
 472         ASSERT(ip->i_d.di_version > 1);
 473
 474         iip = ip->i_itemp;
 475         mp = ip->i_mount;
 476
 477         /* set *dip = inode's place in the buffer */
 478         dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 479
 480         ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC);
 481         if (XFS_ISREG(ip)) {
 482                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 483                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) );
 484         } else if (XFS_ISDIR(ip)) {
 485                 ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
 486                         (ip->i_d.di_format == XFS_DINODE_FMT_BTREE)   ||
 487                         (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) );
 488         }
 489         ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks);
 490         ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize);
 491
 492         /* bump the change count on v3 inodes */
 493         if (ip->i_d.di_version == 3)
 494                 VFS_I(ip)->i_version++;
 495
 496         /* Check the inline fork data before we write out. */
 497         if (!libxfs_inode_verify_forks(ip))
 498                 return -EFSCORRUPTED;
 499
 500         /*
 501          * Copy the dirty parts of the inode into the on-disk
 502          * inode.  We always copy out the core of the inode,
 503          * because if the inode is dirty at all the core must
 504          * be.
 505          */
 506         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
 507
 508         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 509         if (XFS_IFORK_Q(ip))
 510                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 511
 512         /* generate the checksum. */
 513         xfs_dinode_calc_crc(mp, dip);
 514
 515         return 0;
 516 }
 517
 518 int
 519 libxfs_mod_incore_sb(
 520         struct xfs_mount *mp,
 521         int             field,
 522         int64_t         delta,
 523         int             rsvd)
 524 {
 525         long long       lcounter;       /* long counter for 64 bit fields */
 526
 527         switch (field) {
 528         case XFS_TRANS_SB_FDBLOCKS:
 529                 lcounter = (long long)mp->m_sb.sb_fdblocks;
 530                 lcounter += delta;
 531                 if (lcounter < 0)
 532                         return -ENOSPC;
 533                 mp->m_sb.sb_fdblocks = lcounter;
 534                 return 0;
 535         default:
 536                 ASSERT(0);
 537                 return -EINVAL;
 538         }
 539 }
 540
 541 /*
 542  * This routine allocates disk space for the given file.
 543  * Originally derived from xfs_alloc_file_space().
 544  */
 545 int
 546 libxfs_alloc_file_space(
 547         xfs_inode_t     *ip,
 548         xfs_off_t       offset,
 549         xfs_off_t       len,
 550         int             alloc_type,
 551         int             attr_flags)
 552 {
 553         xfs_mount_t     *mp;
 554         xfs_off_t       count;
 555         xfs_filblks_t   datablocks;
 556         xfs_filblks_t   allocated_fsb;
 557         xfs_filblks_t   allocatesize_fsb;
 558         xfs_fsblock_t   firstfsb;
 559         struct xfs_defer_ops free_list;
 560         xfs_bmbt_irec_t *imapp;
 561         xfs_bmbt_irec_t imaps[1];
 562         int             reccount;
 563         uint            resblks;
 564         xfs_fileoff_t   startoffset_fsb;
 565         xfs_trans_t     *tp;
 566         int             xfs_bmapi_flags;
 567         int             error;
 568
 569         if (len <= 0)
 570                 return -EINVAL;
 571
 572         count = len;
 573         error = 0;
 574         imapp = &imaps[0];
 575         reccount = 1;
 576         xfs_bmapi_flags = alloc_type ? XFS_BMAPI_PREALLOC : 0;
 577         mp = ip->i_mount;
 578         startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
 579         allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 580
 581         /* allocate file space until done or until there is an error */
 582         while (allocatesize_fsb && !error) {
 583                 datablocks = allocatesize_fsb;
 584
 585                 resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
 586                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
 587                                         0, 0, &tp);
 588                 /*
 589                  * Check for running out of space
 590                  */
 591                 if (error) {
 592                         ASSERT(error == -ENOSPC);
 593                         break;
 594                 }
 595                 xfs_trans_ijoin(tp, ip, 0);
 596
 597                 xfs_defer_init(&free_list, &firstfsb);
 598                 error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb,
 599                                 xfs_bmapi_flags, &firstfsb, 0, imapp,
 600                                 &reccount, &free_list);
 601
 602                 if (error)
 603                         goto error0;
 604
 605                 /*
 606                  * Complete the transaction
 607                  */
 608                 error = xfs_defer_finish(&tp, &free_list);
 609                 if (error)
 610                         goto error0;
 611
 612                 error = xfs_trans_commit(tp);
 613                 if (error)
 614                         break;
 615
 616                 allocated_fsb = imapp->br_blockcount;
 617                 if (reccount == 0)
 618                         return -ENOSPC;
 619
 620                 startoffset_fsb += allocated_fsb;
 621                 allocatesize_fsb -= allocated_fsb;
 622         }
 623         return error;
 624
 625 error0: /* Cancel bmap, cancel trans */
 626         xfs_defer_cancel(&free_list);
 627         xfs_trans_cancel(tp);
 628         return error;
 629 }
 630
 631 /*
 632  * Wrapper around call to libxfs_ialloc. Takes care of committing and
 633  * allocating a new transaction as needed.
 634  *
 635  * Originally there were two copies of this code - one in mkfs, the
 636  * other in repair - now there is just the one.
 637  */
 638 int
 639 libxfs_inode_alloc(
 640         xfs_trans_t     **tp,
 641         xfs_inode_t     *pip,
 642         mode_t          mode,
 643         nlink_t         nlink,
 644         xfs_dev_t       rdev,
 645         struct cred     *cr,
 646         struct fsxattr  *fsx,
 647         xfs_inode_t     **ipp)
 648 {
 649         xfs_buf_t       *ialloc_context;
 650         xfs_inode_t     *ip;
 651         int             error;
 652
 653         ialloc_context = (xfs_buf_t *)0;
 654         error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx,
 655                            &ialloc_context, &ip);
 656         if (error) {
 657                 *ipp = NULL;
 658                 return error;
 659         }
 660         if (!ialloc_context && !ip) {
 661                 *ipp = NULL;
 662                 return -ENOSPC;
 663         }
 664
 665         if (ialloc_context) {
 666
 667                 xfs_trans_bhold(*tp, ialloc_context);
 668
 669                 error = xfs_trans_roll(tp);
 670                 if (error) {
 671                         fprintf(stderr, _("%s: cannot duplicate transaction: %s\n"),
 672                                 progname, strerror(error));
 673                         exit(1);
 674                 }
 675                 xfs_trans_bjoin(*tp, ialloc_context);
 676                 error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
 677                                    fsx, &ialloc_context, &ip);
 678                 if (!ip)
 679                         error = -ENOSPC;
 680                 if (error)
 681                         return error;
 682         }
 683
 684         *ipp = ip;
 685         return error;
 686 }
 687
 688 /*
 689  * Userspace versions of common diagnostic routines (varargs fun).
 690  */
 691 void
 692 libxfs_fs_repair_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 693 {
 694         va_list ap;
 695
 696         va_start(ap, fmt);
 697         vfprintf(stderr, fmt, ap);
 698         fprintf(stderr, "  This is a bug.\n");
 699         fprintf(stderr, "%s version %s\n", progname, VERSION);
 700         fprintf(stderr,
 701                 "Please capture the filesystem metadata with xfs_metadump and\n"
 702                 "report it to linux-xfs@vger.kernel.org\n");
 703         va_end(ap);
 704 }
 705
 706 void
 707 libxfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
 708 {
 709         va_list ap;
 710
 711         va_start(ap, fmt);
 712         vfprintf(stderr, fmt, ap);
 713         fputs("\n", stderr);
 714         va_end(ap);
 715 }
 716
 717 void
 718 cmn_err(int level, char *fmt, ...)
 719 {
 720         va_list ap;
 721
 722         va_start(ap, fmt);
 723         vfprintf(stderr, fmt, ap);
 724         fputs("\n", stderr);
 725         va_end(ap);
 726 }
 727
 728 /*
 729  * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
 730  * values, and omit the stack trace unless the error level is tuned high.
 731  */
 732 void
 733 xfs_verifier_error(
 734         struct xfs_buf          *bp,
 735         int                     error,
 736         xfs_failaddr_t          failaddr)
 737 {
 738         xfs_buf_ioerror(bp, error);
 739
 740         xfs_alert(NULL, "Metadata %s detected at %p, %s block 0x%llx/0x%x",
 741                   bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
 742                   failaddr ? failaddr : __return_address,
 743                   bp->b_ops->name, bp->b_bn, BBTOB(bp->b_length));
 744 }
 745
 746 /*
 747  * This is called from I/O verifiers on v5 superblock filesystems. In the
 748  * kernel, it validates the metadata LSN parameter against the current LSN of
 749  * the active log. We don't have an active log in userspace so this kind of
 750  * validation is not required. Therefore, this function always returns true in
 751  * userspace.
 752  *
 753  * xfs_repair piggybacks off this mechanism to help track the largest metadata
 754  * LSN in use on a filesystem. Keep a record of the largest LSN seen such that
 755  * repair can validate it against the state of the log.
 756  */
 757 xfs_lsn_t       libxfs_max_lsn = 0;
 758 pthread_mutex_t libxfs_max_lsn_lock = PTHREAD_MUTEX_INITIALIZER;
 759
 760 bool
 761 xfs_log_check_lsn(
 762         struct xfs_mount        *mp,
 763         xfs_lsn_t               lsn)
 764 {
 765         int                     cycle = CYCLE_LSN(lsn);
 766         int                     block = BLOCK_LSN(lsn);
 767         int                     max_cycle;
 768         int                     max_block;
 769
 770         if (lsn == NULLCOMMITLSN)
 771                 return true;
 772
 773         pthread_mutex_lock(&libxfs_max_lsn_lock);
 774
 775         max_cycle = CYCLE_LSN(libxfs_max_lsn);
 776         max_block = BLOCK_LSN(libxfs_max_lsn);
 777
 778         if ((cycle > max_cycle) ||
 779             (cycle == max_cycle && block > max_block))
 780                 libxfs_max_lsn = lsn;
 781
 782         pthread_mutex_unlock(&libxfs_max_lsn_lock);
 783
 784         return true;
 785 }
 786
 787 static struct xfs_buftarg *
 788 xfs_find_bdev_for_inode(
 789         struct xfs_inode        *ip)
 790 {
 791         struct xfs_mount        *mp = ip->i_mount;
 792
 793         if (XFS_IS_REALTIME_INODE(ip))
 794                 return mp->m_rtdev_targp;
 795         return mp->m_ddev_targp;
 796 }
 797
 798 static xfs_daddr_t
 799 xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 800 {
 801         if (XFS_IS_REALTIME_INODE(ip))
 802                  return XFS_FSB_TO_BB(ip->i_mount, fsb);
 803         return XFS_FSB_TO_DADDR(ip->i_mount, (fsb));
 804 }
 805
 806 int
 807 libxfs_zero_extent(
 808         struct xfs_inode *ip,
 809         xfs_fsblock_t   start_fsb,
 810         xfs_off_t       count_fsb)
 811 {
 812         xfs_daddr_t     sector = xfs_fsb_to_db(ip, start_fsb);
 813         ssize_t         size = XFS_FSB_TO_BB(ip->i_mount, count_fsb);
 814
 815         return libxfs_device_zero(xfs_find_bdev_for_inode(ip), sector, size);
 816 }
 817
 818 unsigned int
 819 hweight8(unsigned int w)
 820 {
 821         unsigned int res = w - ((w >> 1) & 0x55);
 822         res = (res & 0x33) + ((res >> 2) & 0x33);
 823         return (res + (res >> 4)) & 0x0F;
 824 }