]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - libxfs/trans.c
libxfs: track transaction block reservation usage like the kernel
[thirdparty/xfsprogs-dev.git] / libxfs / trans.c
index 33c24ac81914da65cc101550211ece2b8d466573..e0567d504c0218612ccaa4a9c7947a9678269ca7 100644 (file)
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2001,2005-2006 Silicon Graphics, Inc.
+ * Copyright (C) 2010 Red Hat, Inc.
  * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#include <xfs.h>
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode_buf.h"
+#include "xfs_inode_fork.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+
+static void xfs_trans_free_items(struct xfs_trans *tp);
+STATIC struct xfs_trans *xfs_trans_dup(struct xfs_trans *tp);
+static int xfs_trans_reserve(struct xfs_trans *tp, struct xfs_trans_res *resp,
+               uint blocks, uint rtextents);
+static int __xfs_trans_commit(struct xfs_trans *tp, bool regrant);
 
 /*
  * Simple transaction interface
  */
 
-xfs_trans_t *
-libxfs_trans_alloc(
-       xfs_mount_t     *mp,
-       int             type)
-{
-       xfs_trans_t     *ptr;
+kmem_zone_t    *xfs_trans_zone;
 
-       if ((ptr = calloc(sizeof(xfs_trans_t), 1)) == NULL) {
-               fprintf(stderr, _("%s: xact calloc failed (%d bytes): %s\n"),
-                       progname, (int)sizeof(xfs_trans_t), strerror(errno));
-               exit(1);
-       }
-       ptr->t_mountp = mp;
-       ptr->t_type = type;
-       ptr->t_items_free = XFS_LIC_NUM_SLOTS;
-       XFS_LIC_INIT(&(ptr->t_items));
-#ifdef XACT_DEBUG
-       fprintf(stderr, "allocated new transaction %p\n", ptr);
-#endif
-       return ptr;
+/*
+ * Initialize the precomputed transaction reservation values
+ * in the mount structure.
+ */
+void
+libxfs_trans_init(
+       struct xfs_mount        *mp)
+{
+       xfs_trans_resv_calc(mp, &mp->m_resv);
 }
 
-xfs_trans_t *
-libxfs_trans_dup(
-       xfs_trans_t     *tp)
+/*
+ * Add the given log item to the transaction's list of log items.
+ */
+void
+libxfs_trans_add_item(
+       struct xfs_trans        *tp,
+       struct xfs_log_item     *lip)
 {
-       xfs_trans_t     *ptr;
+       ASSERT(lip->li_mountp == tp->t_mountp);
+       ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
+       ASSERT(list_empty(&lip->li_trans));
+       ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
 
-       ptr = libxfs_trans_alloc(tp->t_mountp, tp->t_type);
-#ifdef XACT_DEBUG
-       fprintf(stderr, "duplicated transaction %p (new=%p)\n", tp, ptr);
-#endif
-       return ptr;
+       list_add_tail(&lip->li_trans, &tp->t_items);
+}
+
+/*
+ * Unlink and free the given descriptor.
+ */
+void
+libxfs_trans_del_item(
+       struct xfs_log_item     *lip)
+{
+       clear_bit(XFS_LI_DIRTY, &lip->li_flags);
+       list_del_init(&lip->li_trans);
 }
 
+/*
+ * Roll from one trans in the sequence of PERMANENT transactions to
+ * the next: permanent transactions are only flushed out when
+ * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
+ * as possible to let chunks of it go to the log. So we commit the
+ * chunk we've been working on and get a new transaction to continue.
+ */
 int
-libxfs_trans_reserve(
-       xfs_trans_t     *tp,
-       uint            blocks,
-       uint            logspace,
-       uint            rtextents,
-       uint            flags,
-       uint            logcount)
+libxfs_trans_roll(
+       struct xfs_trans        **tpp)
+{
+       struct xfs_trans        *trans = *tpp;
+       struct xfs_trans_res    tres;
+       int                     error;
+
+       /*
+        * Copy the critical parameters from one trans to the next.
+        */
+       tres.tr_logres = trans->t_log_res;
+       tres.tr_logcount = trans->t_log_count;
+
+       *tpp = xfs_trans_dup(trans);
+
+       /*
+        * Commit the current transaction.
+        * If this commit failed, then it'd just unlock those items that
+        * are marked to be released. That also means that a filesystem shutdown
+        * is in progress. The caller takes the responsibility to cancel
+        * the duplicate transaction that gets returned.
+        */
+       error = __xfs_trans_commit(trans, true);
+       if (error)
+               return error;
+
+       /*
+        * Reserve space in the log for the next transaction.
+        * This also pushes items in the "AIL", the list of logged items,
+        * out to disk if they are taking up space at the tail of the log
+        * that we want to use.  This requires that either nothing be locked
+        * across this call, or that anything that is locked be logged in
+        * the prior and the next transactions.
+        */
+       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+       return xfs_trans_reserve(*tpp, &tres, 0, 0);
+}
+
+/*
+ * Free the transaction structure.  If there is more clean up
+ * to do when the structure is freed, add it here.
+ */
+static void
+xfs_trans_free(
+       struct xfs_trans        *tp)
+{
+       kmem_zone_free(xfs_trans_zone, tp);
+}
+
+/*
+ * This is called to create a new transaction which will share the
+ * permanent log reservation of the given transaction.  The remaining
+ * unused block and rt extent reservations are also inherited.  This
+ * implies that the original transaction is no longer allowed to allocate
+ * blocks.  Locks and log items, however, are no inherited.  They must
+ * be added to the new transaction explicitly.
+ */
+STATIC struct xfs_trans *
+xfs_trans_dup(
+       struct xfs_trans        *tp)
+{
+       struct xfs_trans        *ntp;
+
+       ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
+
+       /*
+        * Initialize the new transaction structure.
+        */
+       ntp->t_mountp = tp->t_mountp;
+       INIT_LIST_HEAD(&ntp->t_items);
+
+       ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+       ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
+                      (tp->t_flags & XFS_TRANS_RESERVE) |
+                      (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
+       /* We gave our writer reference to the new transaction */
+       tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
+
+       ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
+       tp->t_blk_res = tp->t_blk_res_used;
+
+       ntp->t_agfl_dfops = tp->t_agfl_dfops;
+
+       return ntp;
+}
+
+/*
+ * This is called to reserve free disk blocks and log space for the
+ * given transaction.  This must be done before allocating any resources
+ * within the transaction.
+ *
+ * This will return ENOSPC if there are not enough blocks available.
+ * It will sleep waiting for available log space.
+ * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
+ * is used by long running transactions.  If any one of the reservations
+ * fails then they will all be backed out.
+ *
+ * This does not do quota reservations. That typically is done by the
+ * caller afterwards.
+ */
+static int
+xfs_trans_reserve(
+       struct xfs_trans        *tp,
+       struct xfs_trans_res    *resp,
+       uint                    blocks,
+       uint                    rtextents)
 {
-       xfs_sb_t        *mpsb = &tp->t_mountp->m_sb;
+       int                     error = 0;
 
        /*
         * Attempt to reserve the needed disk blocks by decrementing
-        * the number needed from the number available.  This will
+        * the number needed from the number available.  This will
         * fail if the count would go below zero.
         */
        if (blocks > 0) {
-               if (mpsb->sb_fdblocks < blocks)
-                       return ENOSPC;
+               if (tp->t_mountp->m_sb.sb_fdblocks < blocks)
+                       return -ENOSPC;
+               tp->t_blk_res += blocks;
+       }
+
+       /*
+        * Reserve the log space needed for this transaction.
+        */
+       if (resp->tr_logres > 0) {
+               ASSERT(tp->t_log_res == 0 ||
+                      tp->t_log_res == resp->tr_logres);
+               ASSERT(tp->t_log_count == 0 ||
+                      tp->t_log_count == resp->tr_logcount);
+
+               if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES)
+                       tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
+               else
+                       ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
+
+               tp->t_log_res = resp->tr_logres;
+               tp->t_log_count = resp->tr_logcount;
+       }
+
+       /*
+        * Attempt to reserve the needed realtime extents by decrementing
+        * the number needed from the number available.  This will
+        * fail if the count would go below zero.
+        */
+       if (rtextents > 0) {
+               if (tp->t_mountp->m_sb.sb_rextents < rtextents) {
+                       error = -ENOSPC;
+                       goto undo_blocks;
+               }
+       }
+
+       return 0;
+
+       /*
+        * Error cases jump to one of these labels to undo any
+        * reservations which have already been performed.
+        */
+undo_blocks:
+       if (blocks > 0)
+               tp->t_blk_res = 0;
+
+       return error;
+}
+
+int
+libxfs_trans_alloc(
+       struct xfs_mount        *mp,
+       struct xfs_trans_res    *resp,
+       unsigned int            blocks,
+       unsigned int            rtextents,
+       unsigned int            flags,
+       struct xfs_trans        **tpp)
+
+{
+       struct xfs_trans        *tp;
+       int                     error;
+
+       tp = kmem_zone_zalloc(xfs_trans_zone,
+               (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
+       tp->t_mountp = mp;
+       INIT_LIST_HEAD(&tp->t_items);
+
+       error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+       if (error) {
+               xfs_trans_cancel(tp);
+               return error;
        }
-       /* user space, don't need log/RT stuff (preserve the API though) */
+#ifdef XACT_DEBUG
+       fprintf(stderr, "allocated new transaction %p\n", tp);
+#endif
+       *tpp = tp;
        return 0;
 }
 
+/*
+ * Create an empty transaction with no reservation.  This is a defensive
+ * mechanism for routines that query metadata without actually modifying
+ * them -- if the metadata being queried is somehow cross-linked (think a
+ * btree block pointer that points higher in the tree), we risk deadlock.
+ * However, blocks grabbed as part of a transaction can be re-grabbed.
+ * The verifiers will notice the corrupt block and the operation will fail
+ * back to userspace without deadlocking.
+ *
+ * Note the zero-length reservation; this transaction MUST be cancelled
+ * without any dirty data.
+ */
+int
+libxfs_trans_alloc_empty(
+       struct xfs_mount                *mp,
+       struct xfs_trans                **tpp)
+{
+       struct xfs_trans_res            resv = {0};
+
+       return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
+}
+
+/*
+ * Allocate a transaction that can be rolled.  Since userspace doesn't have
+ * a need for log reservations, we really only tr_itruncate to get the
+ * permanent log reservation flag to avoid blowing asserts.
+ */
+int
+libxfs_trans_alloc_rollable(
+       struct xfs_mount        *mp,
+       unsigned int            blocks,
+       struct xfs_trans        **tpp)
+{
+       return libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, blocks,
+                       0, 0, tpp);
+}
+
 void
 libxfs_trans_cancel(
-       xfs_trans_t     *tp,
-       int             flags)
+       struct xfs_trans        *tp)
 {
 #ifdef XACT_DEBUG
-       xfs_trans_t     *otp = tp;
+       struct xfs_trans        *otp = tp;
 #endif
-       if (tp != NULL) {
-               xfs_trans_free_items(tp, flags);
-               free(tp);
-               tp = NULL;
-       }
+       if (tp == NULL)
+               goto out;
+
+       xfs_trans_free_items(tp);
+       xfs_trans_free(tp);
+
+out:
 #ifdef XACT_DEBUG
        fprintf(stderr, "## cancelled transaction %p\n", otp);
 #endif
+       return;
 }
 
 int
@@ -113,9 +341,11 @@ libxfs_trans_iget(
        xfs_inode_log_item_t    *iip;
 
        if (tp == NULL)
-               return libxfs_iget(mp, tp, ino, lock_flags, ipp, 0);
+               return libxfs_iget(mp, tp, ino, lock_flags, ipp,
+                               &xfs_default_ifork_ops);
 
-       error = libxfs_iget(mp, tp, ino, lock_flags, &ip, 0);
+       error = libxfs_iget(mp, tp, ino, lock_flags, &ip,
+                       &xfs_default_ifork_ops);
        if (error)
                return error;
        ASSERT(ip != NULL);
@@ -132,33 +362,6 @@ libxfs_trans_iget(
        return 0;
 }
 
-void
-libxfs_trans_iput(
-       xfs_trans_t             *tp,
-       xfs_inode_t             *ip,
-       uint                    lock_flags)
-{
-       xfs_inode_log_item_t    *iip;
-       xfs_log_item_desc_t     *lidp;
-
-       if (tp == NULL) {
-               libxfs_iput(ip, lock_flags);
-               return;
-       }
-
-       ASSERT(ip->i_transp == tp);
-       iip = ip->i_itemp;
-       ASSERT(iip != NULL);
-
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)iip);
-       ASSERT(lidp != NULL);
-       ASSERT(lidp->lid_item == (xfs_log_item_t *)iip);
-       ASSERT(!(lidp->lid_flags & XFS_LID_DIRTY));
-       xfs_trans_free_item(tp, lidp);
-
-       libxfs_iput(ip, lock_flags);
-}
-
 void
 libxfs_trans_ijoin(
        xfs_trans_t             *tp,
@@ -183,16 +386,18 @@ libxfs_trans_ijoin(
 }
 
 void
-libxfs_trans_ihold(
+libxfs_trans_ijoin_ref(
        xfs_trans_t             *tp,
-       xfs_inode_t             *ip)
+       xfs_inode_t             *ip,
+       int                     lock_flags)
 {
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
 
-       ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
+       xfs_trans_ijoin(tp, ip, lock_flags);
+
 #ifdef XACT_DEBUG
-       fprintf(stderr, "ihold'd inode %llu, transaction %p\n", ip->i_ino, tp);
+       fprintf(stderr, "ijoin_ref'd inode %llu, transaction %p\n", ip->i_ino, tp);
 #endif
 }
 
@@ -201,12 +406,12 @@ libxfs_trans_inode_alloc_buf(
        xfs_trans_t             *tp,
        xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip;
+       xfs_buf_log_item_t      *bip = bp->b_log_item;
 
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       ASSERT(bp->bp_transp == tp);
+       ASSERT(bip != NULL);
        bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 }
 
 /*
@@ -214,7 +419,7 @@ libxfs_trans_inode_alloc_buf(
  * to be logged when the transaction is committed.  The inode must
  * already be associated with the given transaction.
  *
- * The values for fieldmask are defined in xfs_inode_item.h.  We always
+ * The values for fieldmask are defined in xfs_log_format.h.  We always
  * log all of the core inode if any of it has changed, and we always log
  * all of the inline data/extents/b-tree root if any of them has changed.
  */
@@ -224,19 +429,14 @@ xfs_trans_log_inode(
        xfs_inode_t             *ip,
        uint                    flags)
 {
-       xfs_log_item_desc_t     *lidp;
-
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
 #ifdef XACT_DEBUG
        fprintf(stderr, "dirtied inode %llu, transaction %p\n", ip->i_ino, tp);
 #endif
 
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
-       ASSERT(lidp != NULL);
-
        tp->t_flags |= XFS_TRANS_DIRTY;
-       lidp->lid_flags |= XFS_LID_DIRTY;
+       set_bit(XFS_LI_DIRTY, &ip->i_itemp->ili_item.li_flags);
 
        /*
         * Always OR in the bits from the ili_last_fields field.
@@ -246,7 +446,42 @@ xfs_trans_log_inode(
         * this coordination mechanism.
         */
        flags |= ip->i_itemp->ili_last_fields;
-       ip->i_itemp->ili_format.ilf_fields |= flags;
+       ip->i_itemp->ili_fields |= flags;
+}
+
+int
+libxfs_trans_roll_inode(
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *ip)
+{
+       int                     error;
+
+       xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+       error = xfs_trans_roll(tpp);
+       if (!error)
+               xfs_trans_ijoin(*tpp, ip, 0);
+       return error;
+}
+
+
+/*
+ * Mark a buffer dirty in the transaction.
+ */
+void
+libxfs_trans_dirty_buf(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+
+       ASSERT(bp->bp_transp == tp);
+       ASSERT(bip != NULL);
+
+#ifdef XACT_DEBUG
+       fprintf(stderr, "dirtied buffer %p, transaction %p\n", bp, tp);
+#endif
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 }
 
 /*
@@ -260,29 +495,37 @@ xfs_trans_log_inode(
  */
 void
 libxfs_trans_log_buf(
-       xfs_trans_t             *tp,
-       xfs_buf_t               *bp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
        uint                    first,
        uint                    last)
 {
-       xfs_buf_log_item_t      *bip;
-       xfs_log_item_desc_t     *lidp;
+       struct xfs_buf_log_item *bip = bp->b_log_item;
 
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-       ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
-#ifdef XACT_DEBUG
-       fprintf(stderr, "dirtied buffer %p, transaction %p\n", bp, tp);
-#endif
+       ASSERT((first <= last) && (last < bp->b_bcount));
 
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       xfs_trans_dirty_buf(tp, bp);
+       xfs_buf_item_log(bip, first, last);
+}
 
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
-       ASSERT(lidp != NULL);
+/*
+ * For userspace, ordered buffers just need to be marked dirty so
+ * the transaction commit will write them and mark them up-to-date.
+ * In essence, they are just like any other logged buffer in userspace.
+ *
+ * If the buffer is already dirty, trigger the "already logged" return condition.
+ */
+bool
+libxfs_trans_ordered_buf(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf_log_item *bip = bp->b_log_item;
+       bool                    ret;
 
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       lidp->lid_flags |= XFS_LID_DIRTY;
-       xfs_buf_item_log(bip, first, last);
+       ret = test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
+       libxfs_trans_log_buf(tp, bp, 0, bp->b_bcount);
+       return ret;
 }
 
 void
@@ -291,21 +534,18 @@ libxfs_trans_brelse(
        xfs_buf_t               *bp)
 {
        xfs_buf_log_item_t      *bip;
-       xfs_log_item_desc_t     *lidp;
 #ifdef XACT_DEBUG
        fprintf(stderr, "released buffer %p, transaction %p\n", bp, tp);
 #endif
 
        if (tp == NULL) {
-               ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+               ASSERT(bp->bp_transp == NULL);
                libxfs_putbuf(bp);
                return;
        }
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       ASSERT(bp->bp_transp == tp);
+       bip = bp->b_log_item;
        ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-       ASSERT(lidp != NULL);
        if (bip->bli_recur > 0) {
                bip->bli_recur--;
                return;
@@ -313,12 +553,12 @@ libxfs_trans_brelse(
        /* If dirty/stale, can't release till transaction committed */
        if (bip->bli_flags & XFS_BLI_STALE)
                return;
-       if (lidp->lid_flags & XFS_LID_DIRTY)
+       if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))
                return;
-       xfs_trans_free_item(tp, lidp);
+       xfs_trans_del_item(&bip->bli_item);
        if (bip->bli_flags & XFS_BLI_HOLD)
                bip->bli_flags &= ~XFS_BLI_HOLD;
-       XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+       bp->b_transp = NULL;
        libxfs_putbuf(bp);
 }
 
@@ -327,27 +567,23 @@ libxfs_trans_binval(
        xfs_trans_t             *tp,
        xfs_buf_t               *bp)
 {
-       xfs_log_item_desc_t     *lidp;
-       xfs_buf_log_item_t      *bip;
+       xfs_buf_log_item_t      *bip = bp->b_log_item;
 #ifdef XACT_DEBUG
        fprintf(stderr, "binval'd buffer %p, transaction %p\n", bp, tp);
 #endif
 
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+       ASSERT(bp->bp_transp == tp);
+       ASSERT(bip != NULL);
 
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-       ASSERT(lidp != NULL);
        if (bip->bli_flags & XFS_BLI_STALE)
                return;
        XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_STALE(bp);
+       xfs_buf_stale(bp);
        bip->bli_flags |= XFS_BLI_STALE;
        bip->bli_flags &= ~XFS_BLI_DIRTY;
-       bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
-       bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
-       lidp->lid_flags |= XFS_LID_DIRTY;
+       bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
+       bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
+       set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
        tp->t_flags |= XFS_TRANS_DIRTY;
 }
 
@@ -358,15 +594,15 @@ libxfs_trans_bjoin(
 {
        xfs_buf_log_item_t      *bip;
 
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+       ASSERT(bp->bp_transp == NULL);
 #ifdef XACT_DEBUG
        fprintf(stderr, "bjoin'd buffer %p, transaction %p\n", bp, tp);
 #endif
 
        xfs_buf_item_init(bp, tp->t_mountp);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       bip = bp->b_log_item;
        xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
-       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       bp->b_transp = tp;
 }
 
 void
@@ -374,47 +610,41 @@ libxfs_trans_bhold(
        xfs_trans_t             *tp,
        xfs_buf_t               *bp)
 {
-       xfs_buf_log_item_t      *bip;
+       xfs_buf_log_item_t      *bip = bp->b_log_item;
 
-       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+       ASSERT(bp->bp_transp == tp);
+       ASSERT(bip != NULL);
 #ifdef XACT_DEBUG
        fprintf(stderr, "bhold'd buffer %p, transaction %p\n", bp, tp);
 #endif
 
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        bip->bli_flags |= XFS_BLI_HOLD;
 }
 
 xfs_buf_t *
-libxfs_trans_get_buf(
+libxfs_trans_get_buf_map(
        xfs_trans_t             *tp,
-       dev_t                   dev,
-       xfs_daddr_t             d,
-       int                     len,
+       struct xfs_buftarg      *btp,
+       struct xfs_buf_map      *map,
+       int                     nmaps,
        uint                    f)
 {
        xfs_buf_t               *bp;
        xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
 
        if (tp == NULL)
-               return libxfs_getbuf(dev, d, len);
+               return libxfs_getbuf_map(btp, map, nmaps, 0);
 
-       bdev.dev = dev;
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, d, len);
+       bp = xfs_trans_buf_item_match(tp, btp, map, nmaps);
        if (bp != NULL) {
-               ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-               bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+               ASSERT(bp->bp_transp == tp);
+               bip = bp->b_log_item;
                ASSERT(bip != NULL);
                bip->bli_recur++;
                return bp;
        }
 
-       bp = libxfs_getbuf(dev, d, len);
+       bp = libxfs_getbuf_map(btp, map, nmaps, 0);
        if (bp == NULL)
                return NULL;
 #ifdef XACT_DEBUG
@@ -422,12 +652,12 @@ libxfs_trans_get_buf(
 #endif
 
        xfs_buf_item_init(bp, tp->t_mountp);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+       bip = bp->b_log_item;
        bip->bli_recur = 0;
        xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
 
-       /* initialize b_fsprivate2 so we can find it incore */
-       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       /* initialize b_transp so we can find it incore */
+       bp->b_transp = tp;
        return bp;
 }
 
@@ -439,21 +669,16 @@ libxfs_trans_getsb(
 {
        xfs_buf_t               *bp;
        xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
-       int                     len;
+       int                     len = XFS_FSS_TO_BB(mp, 1);
+       DEFINE_SINGLE_BUF_MAP(map, XFS_SB_DADDR, len);
 
        if (tp == NULL)
                return libxfs_getsb(mp, flags);
 
-       bdev.dev = mp->m_dev;
-       len = XFS_FSS_TO_BB(mp, 1);
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, XFS_SB_DADDR, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, XFS_SB_DADDR, len);
+       bp = xfs_trans_buf_item_match(tp, mp->m_dev, &map, 1);
        if (bp != NULL) {
-               ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-               bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+               ASSERT(bp->bp_transp == tp);
+               bip = bp->b_log_item;
                ASSERT(bip != NULL);
                bip->bli_recur++;
                return bp;
@@ -465,62 +690,76 @@ libxfs_trans_getsb(
 #endif
 
        xfs_buf_item_init(bp, mp);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+       bip = bp->b_log_item;
        bip->bli_recur = 0;
        xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
 
-       /* initialize b_fsprivate2 so we can find it incore */
-       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       /* initialize b_transp so we can find it incore */
+       bp->b_transp = tp;
        return bp;
 }
 
 int
-libxfs_trans_read_buf(
+libxfs_trans_read_buf_map(
        xfs_mount_t             *mp,
        xfs_trans_t             *tp,
-       dev_t                   dev,
-       xfs_daddr_t             blkno,
-       int                     len,
+       struct xfs_buftarg      *btp,
+       struct xfs_buf_map      *map,
+       int                     nmaps,
        uint                    flags,
-       xfs_buf_t               **bpp)
+       xfs_buf_t               **bpp,
+       const struct xfs_buf_ops *ops)
 {
        xfs_buf_t               *bp;
        xfs_buf_log_item_t      *bip;
-       xfs_buftarg_t           bdev;
+       int                     error;
+
+       *bpp = NULL;
 
        if (tp == NULL) {
-               *bpp = libxfs_readbuf(dev, blkno, len, flags);
-               return 0;
+               bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
+               if (!bp) {
+                       return (flags & XBF_TRYLOCK) ?  -EAGAIN : -ENOMEM;
+               }
+               if (bp->b_error)
+                       goto out_relse;
+               goto done;
        }
 
-       bdev.dev = dev;
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, blkno, len);
+       bp = xfs_trans_buf_item_match(tp, btp, map, nmaps);
        if (bp != NULL) {
-               ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
-               ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-               bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+               ASSERT(bp->bp_transp == tp);
+               ASSERT(bp->b_log_item != NULL);
+               bip = bp->b_log_item;
                bip->bli_recur++;
-               *bpp = bp;
-               return 0;
+               goto done;
        }
 
-       bp = libxfs_readbuf(dev, blkno, len, flags);
+       bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
+       if (!bp) {
+               return (flags & XBF_TRYLOCK) ?  -EAGAIN : -ENOMEM;
+       }
+       if (bp->b_error)
+               goto out_relse;
+
 #ifdef XACT_DEBUG
        fprintf(stderr, "trans_read_buf buffer %p, transaction %p\n", bp, tp);
 #endif
 
        xfs_buf_item_init(bp, tp->t_mountp);
-       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       bip = bp->b_log_item;
        bip->bli_recur = 0;
        xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
 
-       /* initialise b_fsprivate2 so we can find it incore */
-       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       /* initialise b_transp so we can find it incore */
+       bp->b_transp = tp;
+done:
        *bpp = bp;
        return 0;
+out_relse:
+       error = bp->b_error;
+       xfs_buf_relse(bp);
+       return error;
 }
 
 /*
@@ -542,6 +781,15 @@ libxfs_trans_mod_sb(
        case XFS_TRANS_SB_RES_FDBLOCKS:
                return;
        case XFS_TRANS_SB_FDBLOCKS:
+               if (delta < 0) {
+                       tp->t_blk_res_used += (uint)-delta;
+                       if (tp->t_blk_res_used > tp->t_blk_res) {
+                               fprintf(stderr,
+_("Transaction block reservation exceeded! %u > %u\n"),
+                                       tp->t_blk_res_used, tp->t_blk_res);
+                               ASSERT(0);
+                       }
+               }
                tp->t_fdblocks_delta += delta;
                break;
        case XFS_TRANS_SB_ICOUNT:
@@ -566,7 +814,7 @@ libxfs_trans_mod_sb(
  * Transaction commital code follows (i.e. write to disk in libxfs)
  */
 
-STATIC void
+static void
 inode_item_done(
        xfs_inode_log_item_t    *iip)
 {
@@ -574,116 +822,88 @@ inode_item_done(
        xfs_inode_t             *ip;
        xfs_mount_t             *mp;
        xfs_buf_t               *bp;
-       int                     hold;
        int                     error;
-       extern xfs_zone_t       *xfs_ili_zone;
 
        ip = iip->ili_inode;
        mp = iip->ili_item.li_mountp;
-       hold = iip->ili_flags & XFS_ILI_HOLD;
        ASSERT(ip != NULL);
 
-       if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) {
+       if (!(iip->ili_fields & XFS_ILOG_ALL)) {
                ip->i_transp = NULL;    /* disassociate from transaction */
                iip->ili_flags = 0;     /* reset all flags */
-               if (!hold)
-                       goto ili_done;
                return;
        }
 
        /*
         * Get the buffer containing the on-disk inode.
         */
-       error = libxfs_itobp(mp, NULL, ip, &dip, &bp, 0);
+       error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, 0, 0);
        if (error) {
-               fprintf(stderr, _("%s: warning - itobp failed (%d)\n"),
+               fprintf(stderr, _("%s: warning - imap_to_bp failed (%d)\n"),
                        progname, error);
-               goto ili_done;
+               return;
        }
 
-       XFS_BUF_SET_FSPRIVATE(bp, iip);
+       bp->b_log_item = iip;
        error = libxfs_iflush_int(ip, bp);
        if (error) {
                fprintf(stderr, _("%s: warning - iflush_int failed (%d)\n"),
                        progname, error);
-               goto ili_done;
+               return;
        }
 
        ip->i_transp = NULL;    /* disassociate from transaction */
-       XFS_BUF_SET_FSPRIVATE(bp, NULL);        /* remove log item */
-       XFS_BUF_SET_FSPRIVATE2(bp, NULL);       /* remove xact ptr */
+       bp->b_log_item = NULL;  /* remove log item */
+       bp->b_transp = NULL;    /* remove xact ptr */
        libxfs_writebuf(bp, 0);
 #ifdef XACT_DEBUG
-       fprintf(stderr, "flushing dirty inode %llu, buffer %p (hold=%u)\n",
-                       ip->i_ino, bp, hold);
+       fprintf(stderr, "flushing dirty inode %llu, buffer %p\n",
+                       ip->i_ino, bp);
 #endif
-       if (hold) {
-               iip->ili_flags &= ~XFS_ILI_HOLD;
-               return;
-       }
-       else {
-               libxfs_iput(iip->ili_inode, 0);
-       }
-
-ili_done:
-       if (ip->i_itemp)
-               kmem_zone_free(xfs_ili_zone, ip->i_itemp);
-       else
-               ASSERT(0);
-       ip->i_itemp = NULL;
 }
 
-STATIC void
+static void
 buf_item_done(
        xfs_buf_log_item_t      *bip)
 {
        xfs_buf_t               *bp;
        int                     hold;
-       extern xfs_zone_t       *xfs_buf_item_zone;
+       extern kmem_zone_t      *xfs_buf_item_zone;
 
        bp = bip->bli_buf;
        ASSERT(bp != NULL);
-       XFS_BUF_SET_FSPRIVATE(bp, NULL);        /* remove log item */
-       XFS_BUF_SET_FSPRIVATE2(bp, NULL);       /* remove xact ptr */
+       bp->b_log_item = NULL;                  /* remove log item */
+       bp->b_transp = NULL;                    /* remove xact ptr */
 
        hold = (bip->bli_flags & XFS_BLI_HOLD);
-       if (bip->bli_flags & (XFS_BLI_DIRTY|XFS_BLI_STALE)) {
+       if (bip->bli_flags & XFS_BLI_DIRTY) {
 #ifdef XACT_DEBUG
                fprintf(stderr, "flushing/staling buffer %p (hold=%d)\n",
                        bp, hold);
 #endif
-               if (bip->bli_flags & XFS_BLI_DIRTY)
-                       libxfs_writebuf_int(bp, 0);
-               if (hold)
-                       bip->bli_flags &= ~XFS_BLI_HOLD;
-               else
-                       libxfs_putbuf(bp);
+               libxfs_writebuf_int(bp, 0);
        }
+       if (hold)
+               bip->bli_flags &= ~XFS_BLI_HOLD;
+       else
+               libxfs_putbuf(bp);
        /* release the buf item */
        kmem_zone_free(xfs_buf_item_zone, bip);
 }
 
-/*
- * This is called to perform the commit processing for each
- * item described by the given chunk.
- */
 static void
-trans_chunk_committed(
-       xfs_log_item_chunk_t    *licp)
-{
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_t          *lip;
-       int                     i;
-
-       lidp = licp->lic_descs;
-       for (i = 0; i < licp->lic_unused; i++, lidp++) {
-               if (XFS_LIC_ISFREE(licp, i))
-                       continue;
-               lip = lidp->lid_item;
+trans_committed(
+       xfs_trans_t             *tp)
+{
+       struct xfs_log_item     *lip, *next;
+
+       list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+               xfs_trans_del_item(lip);
+
                if (lip->li_type == XFS_LI_BUF)
-                       buf_item_done((xfs_buf_log_item_t *)lidp->lid_item);
+                       buf_item_done((xfs_buf_log_item_t *)lip);
                else if (lip->li_type == XFS_LI_INODE)
-                       inode_item_done((xfs_inode_log_item_t *)lidp->lid_item);
+                       inode_item_done((xfs_inode_log_item_t *)lip);
                else {
                        fprintf(stderr, _("%s: unrecognised log item type\n"),
                                progname);
@@ -692,126 +912,77 @@ trans_chunk_committed(
        }
 }
 
-/*
- * Calls trans_chunk_committed() to process the items in each chunk.
- */
 static void
-trans_committed(
-       xfs_trans_t             *tp)
+buf_item_unlock(
+       xfs_buf_log_item_t      *bip)
 {
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_chunk_t    *next_licp;
+       xfs_buf_t               *bp = bip->bli_buf;
+       uint                    hold;
 
-       /*
-        * Special case the chunk embedded in the transaction.
-        */
-       licp = &(tp->t_items);
-       if (!(XFS_LIC_ARE_ALL_FREE(licp))) {
-               trans_chunk_committed(licp);
-       }
+       /* Clear the buffer's association with this transaction. */
+       bip->bli_buf->b_transp = NULL;
 
-       /*
-        * Process the items in each chunk in turn.
-        */
-       licp = licp->lic_next;
-       while (licp != NULL) {
-               trans_chunk_committed(licp);
-               next_licp = licp->lic_next;
-               kmem_free(licp, sizeof(xfs_log_item_chunk_t));
-               licp = next_licp;
-       }
+       hold = bip->bli_flags & XFS_BLI_HOLD;
+       bip->bli_flags &= ~XFS_BLI_HOLD;
+       if (!hold)
+               libxfs_putbuf(bp);
 }
 
-/*
- * Unlock each item pointed to by a descriptor in the given chunk.
- * Free descriptors pointing to items which are not dirty if freeing_chunk
- * is zero. If freeing_chunk is non-zero, then we need to unlock all
- * items in the chunk. Return the number of descriptors freed.
- * Originally based on xfs_trans_unlock_chunk() - adapted for libxfs
- * transactions though.
- */
-int
-xfs_trans_unlock_chunk(
-       xfs_log_item_chunk_t    *licp,
-       int                     freeing_chunk,
-       int                     abort,
-       xfs_lsn_t               commit_lsn)     /* nb: unused */
-{
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_t          *lip;
-       int                     i;
-       int                     freed;
-
-       freed = 0;
-       lidp = licp->lic_descs;
-       for (i = 0; i < licp->lic_unused; i++, lidp++) {
-               if (XFS_LIC_ISFREE(licp, i)) {
-                       continue;
-               }
-               lip = lidp->lid_item;
-               lip->li_desc = NULL;
-
-               /*
-                * Disassociate the logged item from this transaction
-                */
-               if (lip->li_type == XFS_LI_BUF) {
-                       xfs_buf_log_item_t      *bip;
-
-                       bip = (xfs_buf_log_item_t *)lidp->lid_item;
-                       XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
-                       bip->bli_flags &= ~XFS_BLI_HOLD;
-               }
-               else if (lip->li_type == XFS_LI_INODE) {
-                       xfs_inode_log_item_t    *iip;
+static void
+inode_item_unlock(
+       xfs_inode_log_item_t    *iip)
+{
+       xfs_inode_t             *ip = iip->ili_inode;
 
-                       iip = (xfs_inode_log_item_t*)lidp->lid_item;
-                       iip->ili_inode->i_transp = NULL;
-                       iip->ili_flags &= ~XFS_ILI_HOLD;
-               }
+       /* Clear the transaction pointer in the inode. */
+       ip->i_transp = NULL;
+
+       iip->ili_flags = 0;
+}
+
+/* Detach and unlock all of the items in a transaction */
+static void
+xfs_trans_free_items(
+       struct xfs_trans        *tp)
+{
+       struct xfs_log_item     *lip, *next;
+
+       list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+               xfs_trans_del_item(lip);
+               if (lip->li_type == XFS_LI_BUF)
+                       buf_item_unlock((xfs_buf_log_item_t *)lip);
+               else if (lip->li_type == XFS_LI_INODE)
+                       inode_item_unlock((xfs_inode_log_item_t *)lip);
                else {
                        fprintf(stderr, _("%s: unrecognised log item type\n"),
                                progname);
                        ASSERT(0);
                }
-
-               /*
-                * Free the descriptor if the item is not dirty
-                * within this transaction and the caller is not
-                * going to just free the entire thing regardless.
-                */
-               if (!(freeing_chunk) &&
-                   (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
-                       XFS_LIC_RELSE(licp, i);
-                       freed++;
-               }
        }
-
-       return (freed);
 }
 
-
 /*
  * Commit the changes represented by this transaction
  */
-int
-libxfs_trans_commit(
-       xfs_trans_t     *tp,
-       uint            flags,
-       xfs_lsn_t       *commit_lsn_p)
+static int
+__xfs_trans_commit(
+       struct xfs_trans        *tp,
+       bool                    regrant)
 {
-       xfs_sb_t        *sbp;
+       struct xfs_sb           *sbp;
+       int                     error = 0;
 
        if (tp == NULL)
                return 0;
 
+       ASSERT(!tp->t_agfl_dfops ||
+              !xfs_defer_has_unfinished_work(tp->t_agfl_dfops) || regrant);
+
        if (!(tp->t_flags & XFS_TRANS_DIRTY)) {
 #ifdef XACT_DEBUG
                fprintf(stderr, "committed clean transaction %p\n", tp);
 #endif
-               xfs_trans_free_items(tp, flags);
-               free(tp);
-               tp = NULL;
-               return 0;
+               goto out_unreserve;
        }
 
        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
@@ -824,7 +995,7 @@ libxfs_trans_commit(
                        sbp->sb_fdblocks += tp->t_fdblocks_delta;
                if (tp->t_frextents_delta)
                        sbp->sb_frextents += tp->t_frextents_delta;
-               libxfs_mod_sb(tp, XFS_SB_ALL_BITS);
+               xfs_log_sb(tp);
        }
 
 #ifdef XACT_DEBUG
@@ -833,7 +1004,18 @@ libxfs_trans_commit(
        trans_committed(tp);
 
        /* That's it for the transaction structure.  Free it. */
-       free(tp);
-       tp = NULL;
+       xfs_trans_free(tp);
        return 0;
+
+out_unreserve:
+       xfs_trans_free_items(tp);
+       xfs_trans_free(tp);
+       return error;
+}
+
+int
+libxfs_trans_commit(
+       struct xfs_trans        *tp)
+{
+       return __xfs_trans_commit(tp, false);
 }