libxfs/trans.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2001,2005-2006 Silicon Graphics, Inc.
   4  * Copyright (C) 2010 Red Hat, Inc.
   5  * All Rights Reserved.
   6  */
   7
   8 #include "libxfs_priv.h"
   9 #include "xfs_fs.h"
  10 #include "xfs_shared.h"
  11 #include "xfs_format.h"
  12 #include "xfs_log_format.h"
  13 #include "xfs_trans_resv.h"
  14 #include "xfs_mount.h"
  15 #include "xfs_inode_buf.h"
  16 #include "xfs_inode_fork.h"
  17 #include "xfs_inode.h"
  18 #include "xfs_trans.h"
  19 #include "xfs_sb.h"
  20 #include "xfs_defer.h"
  21 #include "xfs_trace.h"
  22 #include "xfs_rtbitmap.h"
  23
  24 static void xfs_trans_free_items(struct xfs_trans *tp);
  25 STATIC struct xfs_trans *xfs_trans_dup(struct xfs_trans *tp);
  26 static int xfs_trans_reserve(struct xfs_trans *tp, struct xfs_trans_res *resp,
  27                 uint blocks, uint rtextents);
  28 static int __xfs_trans_commit(struct xfs_trans *tp, bool regrant);
  29
  30 /*
  31  * Simple transaction interface
  32  */
  33
  34 struct kmem_cache       *xfs_trans_cache;
  35
  36 /*
  37  * Initialize the precomputed transaction reservation values
  38  * in the mount structure.
  39  */
  40 void
  41 libxfs_trans_init(
  42         struct xfs_mount        *mp)
  43 {
  44         xfs_trans_resv_calc(mp, &mp->m_resv);
  45 }
  46
  47 /*
  48  * Add the given log item to the transaction's list of log items.
  49  */
  50 void
  51 libxfs_trans_add_item(
  52         struct xfs_trans        *tp,
  53         struct xfs_log_item     *lip)
  54 {
  55         ASSERT(lip->li_mountp == tp->t_mountp);
  56         ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
  57         ASSERT(list_empty(&lip->li_trans));
  58         ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
  59
  60         list_add_tail(&lip->li_trans, &tp->t_items);
  61 }
  62
  63 /*
  64  * Unlink and free the given descriptor.
  65  */
  66 void
  67 libxfs_trans_del_item(
  68         struct xfs_log_item     *lip)
  69 {
  70         clear_bit(XFS_LI_DIRTY, &lip->li_flags);
  71         list_del_init(&lip->li_trans);
  72 }
  73
  74 /*
  75  * Roll from one trans in the sequence of PERMANENT transactions to
  76  * the next: permanent transactions are only flushed out when
  77  * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
  78  * as possible to let chunks of it go to the log. So we commit the
  79  * chunk we've been working on and get a new transaction to continue.
  80  */
  81 int
  82 libxfs_trans_roll(
  83         struct xfs_trans        **tpp)
  84 {
  85         struct xfs_trans        *trans = *tpp;
  86         struct xfs_trans_res    tres;
  87         int                     error;
  88
  89         /*
  90          * Copy the critical parameters from one trans to the next.
  91          */
  92         tres.tr_logres = trans->t_log_res;
  93         tres.tr_logcount = trans->t_log_count;
  94
  95         *tpp = xfs_trans_dup(trans);
  96
  97         /*
  98          * Commit the current transaction.
  99          * If this commit failed, then it'd just unlock those items that
 100          * are marked to be released. That also means that a filesystem shutdown
 101          * is in progress. The caller takes the responsibility to cancel
 102          * the duplicate transaction that gets returned.
 103          */
 104         error = __xfs_trans_commit(trans, true);
 105         if (error)
 106                 return error;
 107
 108         /*
 109          * Reserve space in the log for the next transaction.
 110          * This also pushes items in the "AIL", the list of logged items,
 111          * out to disk if they are taking up space at the tail of the log
 112          * that we want to use.  This requires that either nothing be locked
 113          * across this call, or that anything that is locked be logged in
 114          * the prior and the next transactions.
 115          */
 116         tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
 117         return xfs_trans_reserve(*tpp, &tres, 0, 0);
 118 }
 119
 120 /*
 121  * Free the transaction structure.  If there is more clean up
 122  * to do when the structure is freed, add it here.
 123  */
 124 static void
 125 xfs_trans_free(
 126         struct xfs_trans        *tp)
 127 {
 128         kmem_cache_free(xfs_trans_cache, tp);
 129 }
 130
 131 /*
 132  * This is called to create a new transaction which will share the
 133  * permanent log reservation of the given transaction.  The remaining
 134  * unused block and rt extent reservations are also inherited.  This
 135  * implies that the original transaction is no longer allowed to allocate
 136  * blocks.  Locks and log items, however, are no inherited.  They must
 137  * be added to the new transaction explicitly.
 138  */
 139 STATIC struct xfs_trans *
 140 xfs_trans_dup(
 141         struct xfs_trans        *tp)
 142 {
 143         struct xfs_trans        *ntp;
 144
 145         ntp = kmem_cache_zalloc(xfs_trans_cache, 0);
 146
 147         /*
 148          * Initialize the new transaction structure.
 149          */
 150         ntp->t_mountp = tp->t_mountp;
 151         INIT_LIST_HEAD(&ntp->t_items);
 152         INIT_LIST_HEAD(&ntp->t_dfops);
 153         ntp->t_highest_agno = NULLAGNUMBER;
 154
 155         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 156
 157         ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
 158                        (tp->t_flags & XFS_TRANS_RESERVE) |
 159                        (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
 160         /* We gave our writer reference to the new transaction */
 161         tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
 162
 163         ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
 164         tp->t_blk_res = tp->t_blk_res_used;
 165
 166         /* move deferred ops over to the new tp */
 167         xfs_defer_move(ntp, tp);
 168
 169         return ntp;
 170 }
 171
 172 /*
 173  * This is called to reserve free disk blocks and log space for the
 174  * given transaction.  This must be done before allocating any resources
 175  * within the transaction.
 176  *
 177  * This will return ENOSPC if there are not enough blocks available.
 178  * It will sleep waiting for available log space.
 179  * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
 180  * is used by long running transactions.  If any one of the reservations
 181  * fails then they will all be backed out.
 182  *
 183  * This does not do quota reservations. That typically is done by the
 184  * caller afterwards.
 185  */
 186 static int
 187 xfs_trans_reserve(
 188         struct xfs_trans        *tp,
 189         struct xfs_trans_res    *resp,
 190         uint                    blocks,
 191         uint                    rtextents)
 192 {
 193         int                     error = 0;
 194
 195         /*
 196          * Attempt to reserve the needed disk blocks by decrementing
 197          * the number needed from the number available.  This will
 198          * fail if the count would go below zero.
 199          */
 200         if (blocks > 0) {
 201                 if (tp->t_mountp->m_sb.sb_fdblocks < blocks)
 202                         return -ENOSPC;
 203                 tp->t_blk_res += blocks;
 204         }
 205
 206         /*
 207          * Reserve the log space needed for this transaction.
 208          */
 209         if (resp->tr_logres > 0) {
 210                 ASSERT(tp->t_log_res == 0 ||
 211                        tp->t_log_res == resp->tr_logres);
 212                 ASSERT(tp->t_log_count == 0 ||
 213                        tp->t_log_count == resp->tr_logcount);
 214
 215                 if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES)
 216                         tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
 217                 else
 218                         ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
 219
 220                 tp->t_log_res = resp->tr_logres;
 221                 tp->t_log_count = resp->tr_logcount;
 222         }
 223
 224         /*
 225          * Attempt to reserve the needed realtime extents by decrementing
 226          * the number needed from the number available.  This will
 227          * fail if the count would go below zero.
 228          */
 229         if (rtextents > 0) {
 230                 if (tp->t_mountp->m_sb.sb_rextents < rtextents) {
 231                         error = -ENOSPC;
 232                         goto undo_blocks;
 233                 }
 234                 tp->t_rtx_res += rtextents;
 235         }
 236
 237         return 0;
 238
 239         /*
 240          * Error cases jump to one of these labels to undo any
 241          * reservations which have already been performed.
 242          */
 243 undo_blocks:
 244         if (blocks > 0)
 245                 tp->t_blk_res = 0;
 246
 247         return error;
 248 }
 249
 250 int
 251 libxfs_trans_alloc(
 252         struct xfs_mount        *mp,
 253         struct xfs_trans_res    *resp,
 254         unsigned int            blocks,
 255         unsigned int            rtextents,
 256         unsigned int            flags,
 257         struct xfs_trans        **tpp)
 258
 259 {
 260         struct xfs_trans        *tp;
 261         int                     error;
 262
 263         tp = kmem_cache_zalloc(xfs_trans_cache, 0);
 264         tp->t_mountp = mp;
 265         INIT_LIST_HEAD(&tp->t_items);
 266         INIT_LIST_HEAD(&tp->t_dfops);
 267         tp->t_highest_agno = NULLAGNUMBER;
 268
 269         error = xfs_trans_reserve(tp, resp, blocks, rtextents);
 270         if (error) {
 271                 xfs_trans_cancel(tp);
 272                 return error;
 273         }
 274
 275         trace_xfs_trans_alloc(tp, _RET_IP_);
 276
 277         *tpp = tp;
 278         return 0;
 279 }
 280
 281 /*
 282  * Create an empty transaction with no reservation.  This is a defensive
 283  * mechanism for routines that query metadata without actually modifying
 284  * them -- if the metadata being queried is somehow cross-linked (think a
 285  * btree block pointer that points higher in the tree), we risk deadlock.
 286  * However, blocks grabbed as part of a transaction can be re-grabbed.
 287  * The verifiers will notice the corrupt block and the operation will fail
 288  * back to userspace without deadlocking.
 289  *
 290  * Note the zero-length reservation; this transaction MUST be cancelled
 291  * without any dirty data.
 292  */
 293 int
 294 libxfs_trans_alloc_empty(
 295         struct xfs_mount                *mp,
 296         struct xfs_trans                **tpp)
 297 {
 298         struct xfs_trans_res            resv = {0};
 299
 300         return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
 301 }
 302
 303 /*
 304  * Allocate a transaction that can be rolled.  Since userspace doesn't have
 305  * a need for log reservations, we really only tr_itruncate to get the
 306  * permanent log reservation flag to avoid blowing asserts.
 307  */
 308 int
 309 libxfs_trans_alloc_rollable(
 310         struct xfs_mount        *mp,
 311         unsigned int            blocks,
 312         struct xfs_trans        **tpp)
 313 {
 314         return libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, blocks,
 315                         0, 0, tpp);
 316 }
 317
 318 void
 319 libxfs_trans_cancel(
 320         struct xfs_trans        *tp)
 321 {
 322         bool                    dirty;
 323
 324         trace_xfs_trans_cancel(tp, _RET_IP_);
 325
 326         if (tp == NULL)
 327                 return;
 328         dirty = (tp->t_flags & XFS_TRANS_DIRTY);
 329
 330         /*
 331          * It's never valid to cancel a transaction with deferred ops attached,
 332          * because the transaction is effectively dirty.  Complain about this
 333          * loudly before freeing the in-memory defer items.
 334          */
 335         if (!list_empty(&tp->t_dfops)) {
 336                 ASSERT(list_empty(&tp->t_dfops));
 337                 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 338                 dirty = true;
 339                 xfs_defer_cancel(tp);
 340         }
 341
 342         if (dirty) {
 343                 fprintf(stderr, _("Cancelling dirty transaction!\n"));
 344                 abort();
 345         }
 346
 347         xfs_trans_free_items(tp);
 348         xfs_trans_free(tp);
 349 }
 350
 351 static void
 352 xfs_buf_item_put(
 353         struct xfs_buf_log_item *bip)
 354 {
 355         struct xfs_buf          *bp = bip->bli_buf;
 356
 357         bp->b_log_item = NULL;
 358         kmem_cache_free(xfs_buf_item_cache, bip);
 359 }
 360
 361 /* from xfs_trans_buf.c */
 362
 363 /*
 364  * Add the locked buffer to the transaction.
 365  *
 366  * The buffer must be locked, and it cannot be associated with any
 367  * transaction.
 368  *
 369  * If the buffer does not yet have a buf log item associated with it,
 370  * then allocate one for it.  Then add the buf item to the transaction.
 371  */
 372 STATIC void
 373 _libxfs_trans_bjoin(
 374         struct xfs_trans        *tp,
 375         struct xfs_buf          *bp,
 376         int                     reset_recur)
 377 {
 378         struct xfs_buf_log_item *bip;
 379
 380         ASSERT(bp->b_transp == NULL);
 381
 382         /*
 383          * The xfs_buf_log_item pointer is stored in b_log_item.  If
 384          * it doesn't have one yet, then allocate one and initialize it.
 385          * The checks to see if one is there are in xfs_buf_item_init().
 386          */
 387         xfs_buf_item_init(bp, tp->t_mountp);
 388         bip = bp->b_log_item;
 389         if (reset_recur)
 390                 bip->bli_recur = 0;
 391
 392         /*
 393          * Attach the item to the transaction so we can find it in
 394          * xfs_trans_get_buf() and friends.
 395          */
 396         xfs_trans_add_item(tp, &bip->bli_item);
 397         bp->b_transp = tp;
 398
 399 }
 400
 401 void
 402 libxfs_trans_bjoin(
 403         struct xfs_trans        *tp,
 404         struct xfs_buf          *bp)
 405 {
 406         _libxfs_trans_bjoin(tp, bp, 0);
 407         trace_xfs_trans_bjoin(bp->b_log_item);
 408 }
 409
 410 /*
 411  * Cancel the previous buffer hold request made on this buffer
 412  * for this transaction.
 413  */
 414 void
 415 libxfs_trans_bhold_release(
 416         xfs_trans_t             *tp,
 417         struct xfs_buf          *bp)
 418 {
 419         struct xfs_buf_log_item *bip = bp->b_log_item;
 420
 421         ASSERT(bp->b_transp == tp);
 422         ASSERT(bip != NULL);
 423
 424         bip->bli_flags &= ~XFS_BLI_HOLD;
 425         trace_xfs_trans_bhold_release(bip);
 426 }
 427
 428 /*
 429  * Get and lock the buffer for the caller if it is not already
 430  * locked within the given transaction.  If it is already locked
 431  * within the transaction, just increment its lock recursion count
 432  * and return a pointer to it.
 433  *
 434  * If the transaction pointer is NULL, make this just a normal
 435  * get_buf() call.
 436  */
 437 int
 438 libxfs_trans_get_buf_map(
 439         struct xfs_trans        *tp,
 440         struct xfs_buftarg      *target,
 441         struct xfs_buf_map      *map,
 442         int                     nmaps,
 443         xfs_buf_flags_t         flags,
 444         struct xfs_buf          **bpp)
 445 {
 446         struct xfs_buf          *bp;
 447         struct xfs_buf_log_item *bip;
 448         int                     error;
 449
 450         *bpp = NULL;
 451         if (!tp)
 452                 return libxfs_buf_get_map(target, map, nmaps, 0, bpp);
 453
 454         /*
 455          * If we find the buffer in the cache with this transaction
 456          * pointer in its b_fsprivate2 field, then we know we already
 457          * have it locked.  In this case we just increment the lock
 458          * recursion count and return the buffer to the caller.
 459          */
 460         bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 461         if (bp != NULL) {
 462                 ASSERT(bp->b_transp == tp);
 463                 bip = bp->b_log_item;
 464                 ASSERT(bip != NULL);
 465                 bip->bli_recur++;
 466                 trace_xfs_trans_get_buf_recur(bip);
 467                 *bpp = bp;
 468                 return 0;
 469         }
 470
 471         error = libxfs_buf_get_map(target, map, nmaps, 0, &bp);
 472         if (error)
 473                 return error;
 474
 475         ASSERT(!bp->b_error);
 476
 477         _libxfs_trans_bjoin(tp, bp, 1);
 478         trace_xfs_trans_get_buf(bp->b_log_item);
 479         *bpp = bp;
 480         return 0;
 481 }
 482
 483 struct xfs_buf *
 484 libxfs_trans_getsb(
 485         struct xfs_trans        *tp)
 486 {
 487         struct xfs_mount        *mp = tp->t_mountp;
 488         struct xfs_buf          *bp;
 489         struct xfs_buf_log_item *bip;
 490         int                     len = XFS_FSS_TO_BB(mp, 1);
 491         DEFINE_SINGLE_BUF_MAP(map, XFS_SB_DADDR, len);
 492
 493         if (tp == NULL)
 494                 return libxfs_getsb(mp);
 495
 496         bp = xfs_trans_buf_item_match(tp, mp->m_dev, &map, 1);
 497         if (bp != NULL) {
 498                 ASSERT(bp->b_transp == tp);
 499                 bip = bp->b_log_item;
 500                 ASSERT(bip != NULL);
 501                 bip->bli_recur++;
 502                 trace_xfs_trans_getsb_recur(bip);
 503                 return bp;
 504         }
 505
 506         bp = libxfs_getsb(mp);
 507         if (bp == NULL)
 508                 return NULL;
 509
 510         _libxfs_trans_bjoin(tp, bp, 1);
 511         trace_xfs_trans_getsb(bp->b_log_item);
 512         return bp;
 513 }
 514
 515 int
 516 libxfs_trans_read_buf_map(
 517         struct xfs_mount        *mp,
 518         struct xfs_trans        *tp,
 519         struct xfs_buftarg      *target,
 520         struct xfs_buf_map      *map,
 521         int                     nmaps,
 522         xfs_buf_flags_t         flags,
 523         struct xfs_buf          **bpp,
 524         const struct xfs_buf_ops *ops)
 525 {
 526         struct xfs_buf          *bp;
 527         struct xfs_buf_log_item *bip;
 528         int                     error;
 529
 530         *bpp = NULL;
 531
 532         if (tp == NULL)
 533                 return libxfs_buf_read_map(target, map, nmaps, flags, bpp, ops);
 534
 535         bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 536         if (bp) {
 537                 ASSERT(bp->b_transp == tp);
 538                 ASSERT(bp->b_log_item != NULL);
 539                 bip = bp->b_log_item;
 540                 bip->bli_recur++;
 541                 trace_xfs_trans_read_buf_recur(bip);
 542                 goto done;
 543         }
 544
 545         error = libxfs_buf_read_map(target, map, nmaps, flags, &bp, ops);
 546         if (error)
 547                 return error;
 548
 549         _libxfs_trans_bjoin(tp, bp, 1);
 550 done:
 551         trace_xfs_trans_read_buf(bp->b_log_item);
 552         *bpp = bp;
 553         return 0;
 554 }
 555
 556 /*
 557  * Release a buffer previously joined to the transaction. If the buffer is
 558  * modified within this transaction, decrement the recursion count but do not
 559  * release the buffer even if the count goes to 0. If the buffer is not modified
 560  * within the transaction, decrement the recursion count and release the buffer
 561  * if the recursion count goes to 0.
 562  *
 563  * If the buffer is to be released and it was not already dirty before this
 564  * transaction began, then also free the buf_log_item associated with it.
 565  *
 566  * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call.
 567  */
 568 void
 569 libxfs_trans_brelse(
 570         struct xfs_trans        *tp,
 571         struct xfs_buf          *bp)
 572 {
 573         struct xfs_buf_log_item *bip = bp->b_log_item;
 574
 575         ASSERT(bp->b_transp == tp);
 576
 577         if (!tp) {
 578                 libxfs_buf_relse(bp);
 579                 return;
 580         }
 581
 582         trace_xfs_trans_brelse(bip);
 583         ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
 584
 585         /*
 586          * If the release is for a recursive lookup, then decrement the count
 587          * and return.
 588          */
 589         if (bip->bli_recur > 0) {
 590                 bip->bli_recur--;
 591                 return;
 592         }
 593
 594         /*
 595          * If the buffer is invalidated or dirty in this transaction, we can't
 596          * release it until we commit.
 597          */
 598         if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))
 599                 return;
 600         if (bip->bli_flags & XFS_BLI_STALE)
 601                 return;
 602
 603         /*
 604          * Unlink the log item from the transaction and clear the hold flag, if
 605          * set. We wouldn't want the next user of the buffer to get confused.
 606          */
 607         xfs_trans_del_item(&bip->bli_item);
 608         bip->bli_flags &= ~XFS_BLI_HOLD;
 609
 610         /* drop the reference to the bli */
 611         xfs_buf_item_put(bip);
 612
 613         bp->b_transp = NULL;
 614         libxfs_buf_relse(bp);
 615 }
 616
 617 /*
 618  * Mark the buffer as not needing to be unlocked when the buf item's
 619  * iop_unlock() routine is called.  The buffer must already be locked
 620  * and associated with the given transaction.
 621  */
 622 /* ARGSUSED */
 623 void
 624 libxfs_trans_bhold(
 625         xfs_trans_t             *tp,
 626         struct xfs_buf          *bp)
 627 {
 628         struct xfs_buf_log_item *bip = bp->b_log_item;
 629
 630         ASSERT(bp->b_transp == tp);
 631         ASSERT(bip != NULL);
 632
 633         bip->bli_flags |= XFS_BLI_HOLD;
 634         trace_xfs_trans_bhold(bip);
 635 }
 636
 637 /*
 638  * Mark a buffer dirty in the transaction.
 639  */
 640 void
 641 libxfs_trans_dirty_buf(
 642         struct xfs_trans        *tp,
 643         struct xfs_buf          *bp)
 644 {
 645         struct xfs_buf_log_item *bip = bp->b_log_item;
 646
 647         ASSERT(bp->b_transp == tp);
 648         ASSERT(bip != NULL);
 649
 650         tp->t_flags |= XFS_TRANS_DIRTY;
 651         set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 652 }
 653
 654 /*
 655  * This is called to mark bytes first through last inclusive of the given
 656  * buffer as needing to be logged when the transaction is committed.
 657  * The buffer must already be associated with the given transaction.
 658  *
 659  * First and last are numbers relative to the beginning of this buffer,
 660  * so the first byte in the buffer is numbered 0 regardless of the
 661  * value of b_blkno.
 662  */
 663 void
 664 libxfs_trans_log_buf(
 665         struct xfs_trans        *tp,
 666         struct xfs_buf          *bp,
 667         uint                    first,
 668         uint                    last)
 669 {
 670         struct xfs_buf_log_item *bip = bp->b_log_item;
 671
 672         ASSERT(first <= last && last < BBTOB(bp->b_length));
 673
 674         xfs_trans_dirty_buf(tp, bp);
 675
 676         trace_xfs_trans_log_buf(bip);
 677         xfs_buf_item_log(bip, first, last);
 678 }
 679
 680 void
 681 libxfs_trans_binval(
 682         xfs_trans_t             *tp,
 683         struct xfs_buf          *bp)
 684 {
 685         struct xfs_buf_log_item *bip = bp->b_log_item;
 686
 687         ASSERT(bp->b_transp == tp);
 688         ASSERT(bip != NULL);
 689
 690         trace_xfs_trans_binval(bip);
 691
 692         if (bip->bli_flags & XFS_BLI_STALE)
 693                 return;
 694         XFS_BUF_UNDELAYWRITE(bp);
 695         xfs_buf_stale(bp);
 696
 697         bip->bli_flags |= XFS_BLI_STALE;
 698         bip->bli_flags &= ~XFS_BLI_DIRTY;
 699         bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
 700         bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
 701         set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 702         tp->t_flags |= XFS_TRANS_DIRTY;
 703 }
 704
 705 /*
 706  * Mark the buffer as being one which contains newly allocated
 707  * inodes.  We need to make sure that even if this buffer is
 708  * relogged as an 'inode buf' we still recover all of the inode
 709  * images in the face of a crash.  This works in coordination with
 710  * xfs_buf_item_committed() to ensure that the buffer remains in the
 711  * AIL at its original location even after it has been relogged.
 712  */
 713 /* ARGSUSED */
 714 void
 715 libxfs_trans_inode_alloc_buf(
 716         xfs_trans_t             *tp,
 717         struct xfs_buf          *bp)
 718 {
 719         struct xfs_buf_log_item *bip = bp->b_log_item;
 720
 721         ASSERT(bp->b_transp == tp);
 722         ASSERT(bip != NULL);
 723         bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
 724         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 725 }
 726
 727 /*
 728  * For userspace, ordered buffers just need to be marked dirty so
 729  * the transaction commit will write them and mark them up-to-date.
 730  * In essence, they are just like any other logged buffer in userspace.
 731  *
 732  * If the buffer is already dirty, trigger the "already logged" return condition.
 733  */
 734 bool
 735 libxfs_trans_ordered_buf(
 736         struct xfs_trans        *tp,
 737         struct xfs_buf          *bp)
 738 {
 739         struct xfs_buf_log_item *bip = bp->b_log_item;
 740         bool                    ret;
 741
 742         ret = test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 743         libxfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length));
 744         return ret;
 745 }
 746
 747 /* end of xfs_trans_buf.c */
 748
 749 /*
 750  * Record the indicated change to the given field for application
 751  * to the file system's superblock when the transaction commits.
 752  * For now, just store the change in the transaction structure.
 753  * Mark the transaction structure to indicate that the superblock
 754  * needs to be updated before committing.
 755  *
 756  * Originally derived from xfs_trans_mod_sb().
 757  */
 758 void
 759 libxfs_trans_mod_sb(
 760         xfs_trans_t             *tp,
 761         uint                    field,
 762         long                    delta)
 763 {
 764         switch (field) {
 765         case XFS_TRANS_SB_RES_FDBLOCKS:
 766                 return;
 767         case XFS_TRANS_SB_FDBLOCKS:
 768                 if (delta < 0) {
 769                         tp->t_blk_res_used += (uint)-delta;
 770                         if (tp->t_blk_res_used > tp->t_blk_res) {
 771                                 fprintf(stderr,
 772 _("Transaction block reservation exceeded! %u > %u\n"),
 773                                         tp->t_blk_res_used, tp->t_blk_res);
 774                                 ASSERT(0);
 775                         }
 776                 }
 777                 tp->t_fdblocks_delta += delta;
 778                 break;
 779         case XFS_TRANS_SB_ICOUNT:
 780                 ASSERT(delta > 0);
 781                 tp->t_icount_delta += delta;
 782                 break;
 783         case XFS_TRANS_SB_IFREE:
 784                 tp->t_ifree_delta += delta;
 785                 break;
 786         case XFS_TRANS_SB_FREXTENTS:
 787                 /*
 788                  * Track the number of rt extents allocated in the transaction.
 789                  * Make sure it does not exceed the number reserved.
 790                  */
 791                 if (delta < 0) {
 792                         tp->t_rtx_res_used += (uint)-delta;
 793                         if (tp->t_rtx_res_used > tp->t_rtx_res) {
 794                                 fprintf(stderr,
 795 _("Transaction rt block reservation exceeded! %u > %u\n"),
 796                                         tp->t_rtx_res_used, tp->t_rtx_res);
 797                                 ASSERT(0);
 798                         }
 799                 }
 800                 tp->t_frextents_delta += delta;
 801                 break;
 802         default:
 803                 ASSERT(0);
 804                 return;
 805         }
 806         tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
 807 }
 808
 809 static void
 810 xfs_inode_item_put(
 811         struct xfs_inode_log_item       *iip)
 812 {
 813         struct xfs_inode                *ip = iip->ili_inode;
 814
 815         ASSERT(iip->ili_item.li_buf == NULL);
 816
 817         ip->i_itemp = NULL;
 818
 819         list_del_init(&iip->ili_item.li_bio_list);
 820         kmem_cache_free(xfs_ili_cache, iip);
 821 }
 822
 823
 824 /*
 825  * Transaction commital code follows (i.e. write to disk in libxfs)
 826  *
 827  * XXX (dgc): should failure to flush the inode (e.g. due to uncorrected
 828  * corruption) result in transaction commit failure w/ EFSCORRUPTED?
 829  */
 830 static void
 831 inode_item_done(
 832         struct xfs_inode_log_item       *iip)
 833 {
 834         struct xfs_buf                  *bp;
 835         int                             error;
 836
 837         ASSERT(iip->ili_inode != NULL);
 838
 839         if (!(iip->ili_fields & XFS_ILOG_ALL))
 840                 goto free_item;
 841
 842         bp = iip->ili_item.li_buf;
 843         iip->ili_item.li_buf = NULL;
 844
 845         /*
 846          * Flush the inode and disassociate it from the transaction regardless
 847          * of whether the flush succeed or not. If we fail the flush, make sure
 848          * we still release the buffer reference we currently hold.
 849          */
 850         error = libxfs_iflush_int(iip->ili_inode, bp);
 851         bp->b_transp = NULL;    /* remove xact ptr */
 852
 853         if (error) {
 854                 fprintf(stderr, _("%s: warning - iflush_int failed (%d)\n"),
 855                         progname, error);
 856                 goto free;
 857         }
 858
 859         libxfs_buf_mark_dirty(bp);
 860 free:
 861         libxfs_buf_relse(bp);
 862 free_item:
 863         xfs_inode_item_put(iip);
 864 }
 865
 866 static void
 867 buf_item_done(
 868         xfs_buf_log_item_t      *bip)
 869 {
 870         struct xfs_buf          *bp;
 871         int                     hold;
 872         extern struct kmem_cache        *xfs_buf_item_cache;
 873
 874         bp = bip->bli_buf;
 875         ASSERT(bp != NULL);
 876         bp->b_transp = NULL;                    /* remove xact ptr */
 877
 878         hold = (bip->bli_flags & XFS_BLI_HOLD);
 879         if (bip->bli_flags & XFS_BLI_DIRTY)
 880                 libxfs_buf_mark_dirty(bp);
 881
 882         bip->bli_flags &= ~XFS_BLI_HOLD;
 883         xfs_buf_item_put(bip);
 884         if (hold)
 885                 return;
 886         libxfs_buf_relse(bp);
 887 }
 888
 889 static void
 890 trans_committed(
 891         xfs_trans_t             *tp)
 892 {
 893         struct xfs_log_item     *lip, *next;
 894
 895         list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
 896                 xfs_trans_del_item(lip);
 897
 898                 if (lip->li_type == XFS_LI_BUF)
 899                         buf_item_done((xfs_buf_log_item_t *)lip);
 900                 else if (lip->li_type == XFS_LI_INODE)
 901                         inode_item_done((struct xfs_inode_log_item *)lip);
 902                 else {
 903                         fprintf(stderr, _("%s: unrecognised log item type\n"),
 904                                 progname);
 905                         ASSERT(0);
 906                 }
 907         }
 908 }
 909
 910 static void
 911 buf_item_unlock(
 912         xfs_buf_log_item_t      *bip)
 913 {
 914         struct xfs_buf          *bp = bip->bli_buf;
 915         uint                    hold;
 916
 917         /* Clear the buffer's association with this transaction. */
 918         bip->bli_buf->b_transp = NULL;
 919
 920         hold = bip->bli_flags & XFS_BLI_HOLD;
 921         bip->bli_flags &= ~XFS_BLI_HOLD;
 922         xfs_buf_item_put(bip);
 923         if (!hold)
 924                 libxfs_buf_relse(bp);
 925 }
 926
 927 static void
 928 inode_item_unlock(
 929         struct xfs_inode_log_item       *iip)
 930 {
 931         xfs_inode_item_put(iip);
 932 }
 933
 934 /* Detach and unlock all of the items in a transaction */
 935 static void
 936 xfs_trans_free_items(
 937         struct xfs_trans        *tp)
 938 {
 939         struct xfs_log_item     *lip, *next;
 940
 941         list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
 942                 xfs_trans_del_item(lip);
 943                 if (lip->li_type == XFS_LI_BUF)
 944                         buf_item_unlock((xfs_buf_log_item_t *)lip);
 945                 else if (lip->li_type == XFS_LI_INODE)
 946                         inode_item_unlock((struct xfs_inode_log_item *)lip);
 947                 else {
 948                         fprintf(stderr, _("%s: unrecognised log item type\n"),
 949                                 progname);
 950                         ASSERT(0);
 951                 }
 952         }
 953 }
 954
 955 /*
 956  * Sort transaction items prior to running precommit operations. This will
 957  * attempt to order the items such that they will always be locked in the same
 958  * order. Items that have no sort function are moved to the end of the list
 959  * and so are locked last.
 960  *
 961  * This may need refinement as different types of objects add sort functions.
 962  *
 963  * Function is more complex than it needs to be because we are comparing 64 bit
 964  * values and the function only returns 32 bit values.
 965  */
 966 static int
 967 xfs_trans_precommit_sort(
 968         void                    *unused_arg,
 969         const struct list_head  *a,
 970         const struct list_head  *b)
 971 {
 972         struct xfs_log_item     *lia = container_of(a,
 973                                         struct xfs_log_item, li_trans);
 974         struct xfs_log_item     *lib = container_of(b,
 975                                         struct xfs_log_item, li_trans);
 976         int64_t                 diff;
 977
 978         /*
 979          * If both items are non-sortable, leave them alone. If only one is
 980          * sortable, move the non-sortable item towards the end of the list.
 981          */
 982         if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort)
 983                 return 0;
 984         if (!lia->li_ops->iop_sort)
 985                 return 1;
 986         if (!lib->li_ops->iop_sort)
 987                 return -1;
 988
 989         diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib);
 990         if (diff < 0)
 991                 return -1;
 992         if (diff > 0)
 993                 return 1;
 994         return 0;
 995 }
 996
 997 /*
 998  * Run transaction precommit functions.
 999  *
1000  * If there is an error in any of the callouts, then stop immediately and
1001  * trigger a shutdown to abort the transaction. There is no recovery possible
1002  * from errors at this point as the transaction is dirty....
1003  */
1004 static int
1005 xfs_trans_run_precommits(
1006         struct xfs_trans        *tp)
1007 {
1008         //struct xfs_mount      *mp = tp->t_mountp;
1009         struct xfs_log_item     *lip, *n;
1010         int                     error = 0;
1011
1012         /*
1013          * Sort the item list to avoid ABBA deadlocks with other transactions
1014          * running precommit operations that lock multiple shared items such as
1015          * inode cluster buffers.
1016          */
1017         list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort);
1018
1019         /*
1020          * Precommit operations can remove the log item from the transaction
1021          * if the log item exists purely to delay modifications until they
1022          * can be ordered against other operations. Hence we have to use
1023          * list_for_each_entry_safe() here.
1024          */
1025         list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
1026                 if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
1027                         continue;
1028                 if (lip->li_ops->iop_precommit) {
1029                         error = lip->li_ops->iop_precommit(tp, lip);
1030                         if (error)
1031                                 break;
1032                 }
1033         }
1034         if (error)
1035                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1036         return error;
1037 }
1038
1039 /*
1040  * Commit the changes represented by this transaction
1041  */
1042 static int
1043 __xfs_trans_commit(
1044         struct xfs_trans        *tp,
1045         bool                    regrant)
1046 {
1047         struct xfs_sb           *sbp;
1048         int                     error = 0;
1049
1050         trace_xfs_trans_commit(tp, _RET_IP_);
1051
1052         if (tp == NULL)
1053                 return 0;
1054
1055         error = xfs_trans_run_precommits(tp);
1056         if (error) {
1057                 if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
1058                         xfs_defer_cancel(tp);
1059                 goto out_unreserve;
1060         }
1061
1062         /*
1063          * Finish deferred items on final commit. Only permanent transactions
1064          * should ever have deferred ops.
1065          */
1066         WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
1067                      !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
1068         if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
1069                 error = xfs_defer_finish_noroll(&tp);
1070                 if (error)
1071                         goto out_unreserve;
1072
1073                 /* Run precommits from final tx in defer chain. */
1074                 error = xfs_trans_run_precommits(tp);
1075                 if (error)
1076                         goto out_unreserve;
1077         }
1078
1079         if (!(tp->t_flags & XFS_TRANS_DIRTY))
1080                 goto out_unreserve;
1081
1082         if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
1083                 sbp = &(tp->t_mountp->m_sb);
1084                 if (tp->t_icount_delta)
1085                         sbp->sb_icount += tp->t_icount_delta;
1086                 if (tp->t_ifree_delta)
1087                         sbp->sb_ifree += tp->t_ifree_delta;
1088                 if (tp->t_fdblocks_delta)
1089                         sbp->sb_fdblocks += tp->t_fdblocks_delta;
1090                 if (tp->t_frextents_delta)
1091                         sbp->sb_frextents += tp->t_frextents_delta;
1092                 xfs_log_sb(tp);
1093         }
1094
1095         trans_committed(tp);
1096
1097         /* That's it for the transaction structure.  Free it. */
1098         xfs_trans_free(tp);
1099         return 0;
1100
1101 out_unreserve:
1102         xfs_trans_free_items(tp);
1103         xfs_trans_free(tp);
1104         return error;
1105 }
1106
1107 int
1108 libxfs_trans_commit(
1109         struct xfs_trans        *tp)
1110 {
1111         return __xfs_trans_commit(tp, false);
1112 }
1113
1114 /*
1115  * Allocate an transaction, lock and join the inode to it, and reserve quota.
1116  *
1117  * The caller must ensure that the on-disk dquots attached to this inode have
1118  * already been allocated and initialized.  The caller is responsible for
1119  * releasing ILOCK_EXCL if a new transaction is returned.
1120  */
1121 int
1122 libxfs_trans_alloc_inode(
1123         struct xfs_inode        *ip,
1124         struct xfs_trans_res    *resv,
1125         unsigned int            dblocks,
1126         unsigned int            rblocks,
1127         bool                    force,
1128         struct xfs_trans        **tpp)
1129 {
1130         struct xfs_trans        *tp;
1131         struct xfs_mount        *mp = ip->i_mount;
1132         int                     error;
1133
1134         error = libxfs_trans_alloc(mp, resv, dblocks,
1135                         xfs_rtb_to_rtx(mp, rblocks),
1136                         force ? XFS_TRANS_RESERVE : 0, &tp);
1137         if (error)
1138                 return error;
1139
1140         xfs_ilock(ip, XFS_ILOCK_EXCL);
1141         xfs_trans_ijoin(tp, ip, 0);
1142
1143         *tpp = tp;
1144         return 0;
1145 }
1146
1147 /*
1148  * Try to reserve more blocks for a transaction.  The single use case we
1149  * support is for offline repair -- use a transaction to gather data without
1150  * fear of btree cycle deadlocks; calculate how many blocks we really need
1151  * from that data; and only then start modifying data.  This can fail due to
1152  * ENOSPC, so we have to be able to cancel the transaction.
1153  */
1154 int
1155 libxfs_trans_reserve_more(
1156         struct xfs_trans        *tp,
1157         uint                    blocks,
1158         uint                    rtextents)
1159 {
1160         int                     error = 0;
1161
1162         ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
1163
1164         /*
1165          * Attempt to reserve the needed disk blocks by decrementing
1166          * the number needed from the number available.  This will
1167          * fail if the count would go below zero.
1168          */
1169         if (blocks > 0) {
1170                 if (tp->t_mountp->m_sb.sb_fdblocks < blocks)
1171                         return -ENOSPC;
1172                 tp->t_blk_res += blocks;
1173         }
1174
1175         /*
1176          * Attempt to reserve the needed realtime extents by decrementing
1177          * the number needed from the number available.  This will
1178          * fail if the count would go below zero.
1179          */
1180         if (rtextents > 0) {
1181                 if (tp->t_mountp->m_sb.sb_rextents < rtextents) {
1182                         error = -ENOSPC;
1183                         goto out_blocks;
1184                 }
1185         }
1186
1187         return 0;
1188 out_blocks:
1189         if (blocks > 0)
1190                 tp->t_blk_res -= blocks;
1191
1192         return error;
1193 }