xfs: l_last_sync_lsn is really AIL state

author Dave Chinner <dchinner@redhat.com>

Thu, 20 Jun 2024 07:21:23 +0000 (09:21 +0200)

committer Chandan Babu R <chandanbabu@kernel.org>

Thu, 4 Jul 2024 07:16:46 +0000 (12:46 +0530)
author Dave Chinner <dchinner@redhat.com>
Thu, 20 Jun 2024 07:21:23 +0000 (09:21 +0200)
committer Chandan Babu R <chandanbabu@kernel.org>
Thu, 4 Jul 2024 07:16:46 +0000 (12:46 +0530)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index ae22f361627fe43476d65a8c8266abf6fb40ba47..1977afecd385d58c6d27734becb279fd61db1031 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1230,47 +1230,6 @@ xfs_log_cover(
         return error;
  }
  
-/*
- * We may be holding the log iclog lock upon entering this routine.
- */
-xfs_lsn_t
-xlog_assign_tail_lsn_locked(
-       struct xfs_mount        *mp)
-{
-       struct xlog             *log = mp->m_log;
-       struct xfs_log_item     *lip;
-       xfs_lsn_t               tail_lsn;
-
-       assert_spin_locked(&mp->m_ail->ail_lock);
-
-       /*
-        * To make sure we always have a valid LSN for the log tail we keep
-        * track of the last LSN which was committed in log->l_last_sync_lsn,
-        * and use that when the AIL was empty.
-        */
-       lip = xfs_ail_min(mp->m_ail);
-       if (lip)
-               tail_lsn = lip->li_lsn;
-       else
-               tail_lsn = atomic64_read(&log->l_last_sync_lsn);
-       trace_xfs_log_assign_tail_lsn(log, tail_lsn);
-       atomic64_set(&log->l_tail_lsn, tail_lsn);
-       return tail_lsn;
-}
-
-xfs_lsn_t
-xlog_assign_tail_lsn(
-       struct xfs_mount        *mp)
-{
-       xfs_lsn_t               tail_lsn;
-
-       spin_lock(&mp->m_ail->ail_lock);
-       tail_lsn = xlog_assign_tail_lsn_locked(mp);
-       spin_unlock(&mp->m_ail->ail_lock);
-
-       return tail_lsn;
-}
-
  /*
   * Return the space in the log between the tail and the head.  The head
   * is passed in the cycle/bytes formal parms.  In the special case where
@@ -1501,7 +1460,6 @@ xlog_alloc_log(
         log->l_prev_block  = -1;
         /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
         xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
-       xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
         log->l_curr_cycle  = 1;     /* 0 is bad since this is initial value */
  
         if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
@@ -2549,44 +2507,23 @@ xlog_get_lowest_lsn(
         return lowest_lsn;
  }
  
-/*
- * Completion of a iclog IO does not imply that a transaction has completed, as
- * transactions can be large enough to span many iclogs. We cannot change the
- * tail of the log half way through a transaction as this may be the only
- * transaction in the log and moving the tail to point to the middle of it
- * will prevent recovery from finding the start of the transaction. Hence we
- * should only update the last_sync_lsn if this iclog contains transaction
- * completion callbacks on it.
- *
- * We have to do this before we drop the icloglock to ensure we are the only one
- * that can update it.
- *
- * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
- * the reservation grant head pushing. This is due to the fact that the push
- * target is bound by the current last_sync_lsn value. Hence if we have a large
- * amount of log space bound up in this committing transaction then the
- * last_sync_lsn value may be the limiting factor preventing tail pushing from
- * freeing space in the log. Hence once we've updated the last_sync_lsn we
- * should push the AIL to ensure the push target (and hence the grant head) is
- * no longer bound by the old log head location and can move forwards and make
- * progress again.
- */
  static void
  xlog_state_set_callback(
         struct xlog             *log,
         struct xlog_in_core     *iclog,
         xfs_lsn_t               header_lsn)
  {
+       /*
+        * If there are no callbacks on this iclog, we can mark it clean
+        * immediately and return. Otherwise we need to run the
+        * callbacks.
+        */
+       if (list_empty(&iclog->ic_callbacks)) {
+               xlog_state_clean_iclog(log, iclog);
+               return;
+       }
         trace_xlog_iclog_callback(iclog, _RET_IP_);
         iclog->ic_state = XLOG_STATE_CALLBACK;
-
-       ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
-                          header_lsn) <= 0);
-
-       if (list_empty_careful(&iclog->ic_callbacks))
-               return;
-
-       atomic64_set(&log->l_last_sync_lsn, header_lsn);
  }
  
  /*
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 141bde08bd6e3c30c8822148881719f5614ef161..482955f1fa1f9f5c5cd2663854f8a81c4fcc41b5 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -721,6 +721,24 @@ xlog_cil_ail_insert_batch(
   * items into the AIL. This uses bulk insertion techniques to minimise AIL lock
   * traffic.
   *
+ * The AIL tracks log items via the start record LSN of the checkpoint,
+ * not the commit record LSN. This is because we can pipeline multiple
+ * checkpoints, and so the start record of checkpoint N+1 can be
+ * written before the commit record of checkpoint N. i.e:
+ *
+ *   start N                   commit N
+ *     +-------------+------------+----------------+
+ *               start N+1                     commit N+1
+ *
+ * The tail of the log cannot be moved to the LSN of commit N when all
+ * the items of that checkpoint are written back, because then the
+ * start record for N+1 is no longer in the active portion of the log
+ * and recovery will fail/corrupt the filesystem.
+ *
+ * Hence when all the log items in checkpoint N are written back, the
+ * tail of the log most now only move as far forwards as the start LSN
+ * of checkpoint N+1.
+ *
   * If we are called with the aborted flag set, it is because a log write during
   * a CIL checkpoint commit has failed. In this case, all the items in the
   * checkpoint have already gone through iop_committed and iop_committing, which
@@ -738,24 +756,33 @@ xlog_cil_ail_insert_batch(
   */
  static void
  xlog_cil_ail_insert(
-       struct xlog             *log,
-       struct list_head        *lv_chain,
-       xfs_lsn_t               commit_lsn,
+       struct xfs_cil_ctx      *ctx,
         bool                    aborted)
  {
  #define LOG_ITEM_BATCH_SIZE    32
-       struct xfs_ail          *ailp = log->l_ailp;
+       struct xfs_ail          *ailp = ctx->cil->xc_log->l_ailp;
         struct xfs_log_item     *log_items[LOG_ITEM_BATCH_SIZE];
         struct xfs_log_vec      *lv;
         struct xfs_ail_cursor   cur;
         int                     i = 0;
  
+       /*
+        * Update the AIL head LSN with the commit record LSN of this
+        * checkpoint. As iclogs are always completed in order, this should
+        * always be the same (as iclogs can contain multiple commit records) or
+        * higher LSN than the current head. We do this before insertion of the
+        * items so that log space checks during insertion will reflect the
+        * space that this checkpoint has already consumed.
+        */
+       ASSERT(XFS_LSN_CMP(ctx->commit_lsn, ailp->ail_head_lsn) >= 0 ||
+                       aborted);
         spin_lock(&ailp->ail_lock);
-       xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
+       ailp->ail_head_lsn = ctx->commit_lsn;
+       xfs_trans_ail_cursor_last(ailp, &cur, ctx->start_lsn);
         spin_unlock(&ailp->ail_lock);
  
         /* unpin all the log items */
-       list_for_each_entry(lv, lv_chain, lv_list) {
+       list_for_each_entry(lv, &ctx->lv_chain, lv_list) {
                 struct xfs_log_item     *lip = lv->lv_item;
                 xfs_lsn_t               item_lsn;
  
@@ -768,9 +795,10 @@ xlog_cil_ail_insert(
                 }
  
                 if (lip->li_ops->iop_committed)
-                       item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+                       item_lsn = lip->li_ops->iop_committed(lip,
+                                       ctx->start_lsn);
                 else
-                       item_lsn = commit_lsn;
+                       item_lsn = ctx->start_lsn;
  
                 /* item_lsn of -1 means the item needs no further processing */
                 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -787,7 +815,7 @@ xlog_cil_ail_insert(
                         continue;
                 }
  
-               if (item_lsn != commit_lsn) {
+               if (item_lsn != ctx->start_lsn) {
  
                         /*
                          * Not a bulk update option due to unusual item_lsn.
@@ -810,14 +838,15 @@ xlog_cil_ail_insert(
                 log_items[i++] = lv->lv_item;
                 if (i >= LOG_ITEM_BATCH_SIZE) {
                         xlog_cil_ail_insert_batch(ailp, &cur, log_items,
-                                       LOG_ITEM_BATCH_SIZE, commit_lsn);
+                                       LOG_ITEM_BATCH_SIZE, ctx->start_lsn);
                         i = 0;
                 }
         }
  
         /* make sure we insert the remainder! */
         if (i)
-               xlog_cil_ail_insert_batch(ailp, &cur, log_items, i, commit_lsn);
+               xlog_cil_ail_insert_batch(ailp, &cur, log_items, i,
+                               ctx->start_lsn);
  
         spin_lock(&ailp->ail_lock);
         xfs_trans_ail_cursor_done(&cur);
@@ -863,8 +892,7 @@ xlog_cil_committed(
                 spin_unlock(&ctx->cil->xc_push_lock);
         }
  
-       xlog_cil_ail_insert(ctx->cil->xc_log, &ctx->lv_chain,
-                                       ctx->start_lsn, abort);
+       xlog_cil_ail_insert(ctx, abort);
  
         xfs_extent_busy_sort(&ctx->busy_extents.extent_list);
         xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 971871b84d8436906736c1fb105795c90024d714..4b8ef926044599cd25a7792520bdbe7c34e69475 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -431,13 +431,10 @@ struct xlog {
         int                     l_prev_block;   /* previous logical log block */
  
         /*
-        * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
-        * read without needing to hold specific locks. To avoid operations
-        * contending with other hot objects, place each of them on a separate
-        * cacheline.
+        * l_tail_lsn is atomic so it can be set and read without needing to
+        * hold specific locks. To avoid operations contending with other hot
+        * objects, it on a separate cacheline.
          */
-       /* lsn of last LR on disk */
-       atomic64_t              l_last_sync_lsn ____cacheline_aligned_in_smp;
         /* lsn of 1st LR with unflushed * buffers */
         atomic64_t              l_tail_lsn ____cacheline_aligned_in_smp;
  
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 409b645ce7995ee3a28f9b01db2de318a65a603a..0d4563bd129e1f845eacd2bb58551582cb159166 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1177,8 +1177,8 @@ xlog_check_unmount_rec(
                          */
                         xlog_assign_atomic_lsn(&log->l_tail_lsn,
                                         log->l_curr_cycle, after_umount_blk);
-                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
-                                       log->l_curr_cycle, after_umount_blk);
+                       log->l_ailp->ail_head_lsn =
+                                       atomic64_read(&log->l_tail_lsn);
                         *tail_blk = after_umount_blk;
  
                         *clean = true;
@@ -1212,7 +1212,7 @@ xlog_set_state(
         if (bump_cycle)
                 log->l_curr_cycle++;
         atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+       log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn);
         xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
                                         BBTOB(log->l_curr_block));
         xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
@@ -3366,14 +3366,13 @@ xlog_do_recover(
  
         /*
          * We now update the tail_lsn since much of the recovery has completed
-        * and there may be space available to use.  If there were no extent
-        * or iunlinks, we can free up the entire log and set the tail_lsn to
-        * be the last_sync_lsn.  This was set in xlog_find_tail to be the
-        * lsn of the last known good LR on disk.  If there are extent frees
-        * or iunlinks they will have some entries in the AIL; so we look at
-        * the AIL to determine how to set the tail_lsn.
+        * and there may be space available to use.  If there were no extent or
+        * iunlinks, we can free up the entire log.  This was set in
+        * xlog_find_tail to be the lsn of the last known good LR on disk.  If
+        * there are extent frees or iunlinks they will have some entries in the
+        * AIL; so we look at the AIL to determine how to set the tail_lsn.
          */
-       xlog_assign_tail_lsn(mp);
+       xfs_ail_assign_tail_lsn(log->l_ailp);
  
         /*
          * Now that we've finished replaying all buffer and inode updates,
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

index f98fb86ff8d741f44135fab254c4f398497a4c88..2af9f274e8724e9ab081168e5b445e9c80e0f131 100644 (file)
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,6 +22,7 @@
  #include "xfs_trans.h"
  #include "xfs_log.h"
  #include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
  #include "xfs_buf_item.h"
  #include "xfs_quota.h"
  #include "xfs_dquot_item.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 56c8333a470bbd2692b949132e73ccac39a7d15b..16e0635177ac8a77658a2cc6e54969e1fa0a3ca1 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1407,19 +1407,19 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
                 __field(dev_t, dev)
                 __field(xfs_lsn_t, new_lsn)
                 __field(xfs_lsn_t, old_lsn)
-               __field(xfs_lsn_t, last_sync_lsn)
+               __field(xfs_lsn_t, head_lsn)
         ),
         TP_fast_assign(
                 __entry->dev = log->l_mp->m_super->s_dev;
                 __entry->new_lsn = new_lsn;
                 __entry->old_lsn = atomic64_read(&log->l_tail_lsn);
-               __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
+               __entry->head_lsn = log->l_ailp->ail_head_lsn;
         ),
-       TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d",
+       TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn),
                   CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
-                 CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn))
+                 CYCLE_LSN(__entry->head_lsn), BLOCK_LSN(__entry->head_lsn))
  )
  
  DECLARE_EVENT_CLASS(xfs_file_class,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index 7d6ccd21aae2e53aa2b1aeca7ccc7a9bef505cd4..5f03f82c46838e79948099b8f191c4fcc3e70d56 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -720,6 +720,26 @@ xfs_ail_push_all_sync(
         finish_wait(&ailp->ail_empty, &wait);
  }
  
+void
+__xfs_ail_assign_tail_lsn(
+       struct xfs_ail          *ailp)
+{
+       struct xlog             *log = ailp->ail_log;
+       xfs_lsn_t               tail_lsn;
+
+       assert_spin_locked(&ailp->ail_lock);
+
+       if (xlog_is_shutdown(log))
+               return;
+
+       tail_lsn = __xfs_ail_min_lsn(ailp);
+       if (!tail_lsn)
+               tail_lsn = ailp->ail_head_lsn;
+
+       trace_xfs_log_assign_tail_lsn(log, tail_lsn);
+       atomic64_set(&log->l_tail_lsn, tail_lsn);
+}
+
  /*
   * Callers should pass the original tail lsn so that we can detect if the tail
   * has moved as a result of the operation that was performed. If the caller
@@ -734,15 +754,13 @@ xfs_ail_update_finish(
  {
         struct xlog             *log = ailp->ail_log;
  
-       /* if the tail lsn hasn't changed, don't do updates or wakeups. */
+       /* If the tail lsn hasn't changed, don't do updates or wakeups. */
         if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
                 spin_unlock(&ailp->ail_lock);
                 return;
         }
  
-       if (!xlog_is_shutdown(log))
-               xlog_assign_tail_lsn_locked(log->l_mp);
-
+       __xfs_ail_assign_tail_lsn(ailp);
         if (list_empty(&ailp->ail_head))
                 wake_up_all(&ailp->ail_empty);
         spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h

index 60b4707c3a6583f469dfec912d39a3318bad285f..bd841df93021ffe0aacb747449aa9131d3c4596a 100644 (file)
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -55,6 +55,7 @@ struct xfs_ail {
         struct list_head        ail_cursors;
         spinlock_t              ail_lock;
         xfs_lsn_t               ail_last_pushed_lsn;
+       xfs_lsn_t               ail_head_lsn;
         int                     ail_log_flush;
         unsigned long           ail_opstate;
         struct list_head        ail_buf_list;
@@ -130,6 +131,18 @@ struct xfs_log_item *      xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
                                         struct xfs_ail_cursor *cur);
  void                   xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
  
+void                   __xfs_ail_assign_tail_lsn(struct xfs_ail *ailp);
+
+static inline void
+xfs_ail_assign_tail_lsn(
+       struct xfs_ail          *ailp)
+{
+
+       spin_lock(&ailp->ail_lock);
+       __xfs_ail_assign_tail_lsn(ailp);
+       spin_unlock(&ailp->ail_lock);
+}
+
  #if BITS_PER_LONG != 64
  static inline void
  xfs_trans_ail_copy_lsn(
author	Dave Chinner <dchinner@redhat.com>
	Thu, 20 Jun 2024 07:21:23 +0000 (09:21 +0200)
committer	Chandan Babu R <chandanbabu@kernel.org>
	Thu, 4 Jul 2024 07:16:46 +0000 (12:46 +0530)
fs/xfs/xfs_log.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| blame \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.c		patch \| blob \| blame \| history
fs/xfs/xfs_trace.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_priv.h		patch \| blob \| blame \| history