fs: Fold fsync_buffers_list() into sync_mapping_buffers()

author Jan Kara <jack@suse.cz>

Thu, 26 Mar 2026 09:54:23 +0000 (10:54 +0100)

committer Christian Brauner <brauner@kernel.org>

Thu, 26 Mar 2026 14:03:30 +0000 (15:03 +0100)
author Jan Kara <jack@suse.cz>
Thu, 26 Mar 2026 09:54:23 +0000 (10:54 +0100)
committer Christian Brauner <brauner@kernel.org>
Thu, 26 Mar 2026 14:03:30 +0000 (15:03 +0100)
diff --git a/fs/buffer.c b/fs/buffer.c

index 1c0e7c81a38bd551cb429ab49c7ba4f8ccb50438..fa3d84084adfcb79d897cc79be095607aeecb61b 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,7 +54,6 @@
  
  #include "internal.h"
  
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
  static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
                           enum rw_hint hint, struct writeback_control *wbc);
  
@@ -531,22 +530,96 @@ EXPORT_SYMBOL_GPL(inode_has_buffers);
   * @mapping: the mapping which wants those buffers written
   *
   * Starts I/O against the buffers at mapping->i_private_list, and waits upon
- * that I/O.
+ * that I/O. Basically, this is a convenience function for fsync().  @mapping
+ * is a file or directory which needs those buffers to be written for a
+ * successful fsync().
   *
- * Basically, this is a convenience function for fsync().
- * @mapping is a file or directory which needs those buffers to be written for
- * a successful fsync().
+ * We have conflicting pressures: we want to make sure that all
+ * initially dirty buffers get waited on, but that any subsequently
+ * dirtied buffers don't.  After all, we don't want fsync to last
+ * forever if somebody is actively writing to the file.
+ *
+ * Do this in two main stages: first we copy dirty buffers to a
+ * temporary inode list, queueing the writes as we go. Then we clean
+ * up, waiting for those writes to complete. mark_buffer_dirty_inode()
+ * doesn't touch b_assoc_buffers list if b_assoc_map is not NULL so we
+ * are sure the buffer stays on our list until IO completes (at which point
+ * it can be reaped).
   */
  int sync_mapping_buffers(struct address_space *mapping)
  {
         struct address_space *buffer_mapping =
                                 mapping->host->i_sb->s_bdev->bd_mapping;
+       struct buffer_head *bh;
+       int err = 0;
+       struct blk_plug plug;
+       LIST_HEAD(tmp);
  
         if (list_empty(&mapping->i_private_list))
                 return 0;
  
-       return fsync_buffers_list(&buffer_mapping->i_private_lock,
-                                       &mapping->i_private_list);
+       blk_start_plug(&plug);
+
+       spin_lock(&buffer_mapping->i_private_lock);
+       while (!list_empty(&mapping->i_private_list)) {
+               bh = BH_ENTRY(mapping->i_private_list.next);
+               WARN_ON_ONCE(bh->b_assoc_map != mapping);
+               __remove_assoc_queue(bh);
+               /* Avoid race with mark_buffer_dirty_inode() which does
+                * a lockless check and we rely on seeing the dirty bit */
+               smp_mb();
+               if (buffer_dirty(bh) || buffer_locked(bh)) {
+                       list_add(&bh->b_assoc_buffers, &tmp);
+                       bh->b_assoc_map = mapping;
+                       if (buffer_dirty(bh)) {
+                               get_bh(bh);
+                               spin_unlock(&buffer_mapping->i_private_lock);
+                               /*
+                                * Ensure any pending I/O completes so that
+                                * write_dirty_buffer() actually writes the
+                                * current contents - it is a noop if I/O is
+                                * still in flight on potentially older
+                                * contents.
+                                */
+                               write_dirty_buffer(bh, REQ_SYNC);
+
+                               /*
+                                * Kick off IO for the previous mapping. Note
+                                * that we will not run the very last mapping,
+                                * wait_on_buffer() will do that for us
+                                * through sync_buffer().
+                                */
+                               brelse(bh);
+                               spin_lock(&buffer_mapping->i_private_lock);
+                       }
+               }
+       }
+
+       spin_unlock(&buffer_mapping->i_private_lock);
+       blk_finish_plug(&plug);
+       spin_lock(&buffer_mapping->i_private_lock);
+
+       while (!list_empty(&tmp)) {
+               bh = BH_ENTRY(tmp.prev);
+               get_bh(bh);
+               __remove_assoc_queue(bh);
+               /* Avoid race with mark_buffer_dirty_inode() which does
+                * a lockless check and we rely on seeing the dirty bit */
+               smp_mb();
+               if (buffer_dirty(bh)) {
+                       list_add(&bh->b_assoc_buffers,
+                                &mapping->i_private_list);
+                       bh->b_assoc_map = mapping;
+               }
+               spin_unlock(&buffer_mapping->i_private_lock);
+               wait_on_buffer(bh);
+               if (!buffer_uptodate(bh))
+                       err = -EIO;
+               brelse(bh);
+               spin_lock(&buffer_mapping->i_private_lock);
+       }
+       spin_unlock(&buffer_mapping->i_private_lock);
+       return err;
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
@@ -719,99 +792,6 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
  }
  EXPORT_SYMBOL(block_dirty_folio);
  
-/*
- * Write out and wait upon a list of buffers.
- *
- * We have conflicting pressures: we want to make sure that all
- * initially dirty buffers get waited on, but that any subsequently
- * dirtied buffers don't.  After all, we don't want fsync to last
- * forever if somebody is actively writing to the file.
- *
- * Do this in two main stages: first we copy dirty buffers to a
- * temporary inode list, queueing the writes as we go.  Then we clean
- * up, waiting for those writes to complete.
- * 
- * During this second stage, any subsequent updates to the file may end
- * up refiling the buffer on the original inode's dirty list again, so
- * there is a chance we will end up with a buffer queued for write but
- * not yet completed on that list.  So, as a final cleanup we go through
- * the osync code to catch these locked, dirty buffers without requeuing
- * any newly dirty buffers for write.
- */
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
-{
-       struct buffer_head *bh;
-       struct address_space *mapping;
-       int err = 0;
-       struct blk_plug plug;
-       LIST_HEAD(tmp);
-
-       blk_start_plug(&plug);
-
-       spin_lock(lock);
-       while (!list_empty(list)) {
-               bh = BH_ENTRY(list->next);
-               mapping = bh->b_assoc_map;
-               __remove_assoc_queue(bh);
-               /* Avoid race with mark_buffer_dirty_inode() which does
-                * a lockless check and we rely on seeing the dirty bit */
-               smp_mb();
-               if (buffer_dirty(bh) || buffer_locked(bh)) {
-                       list_add(&bh->b_assoc_buffers, &tmp);
-                       bh->b_assoc_map = mapping;
-                       if (buffer_dirty(bh)) {
-                               get_bh(bh);
-                               spin_unlock(lock);
-                               /*
-                                * Ensure any pending I/O completes so that
-                                * write_dirty_buffer() actually writes the
-                                * current contents - it is a noop if I/O is
-                                * still in flight on potentially older
-                                * contents.
-                                */
-                               write_dirty_buffer(bh, REQ_SYNC);
-
-                               /*
-                                * Kick off IO for the previous mapping. Note
-                                * that we will not run the very last mapping,
-                                * wait_on_buffer() will do that for us
-                                * through sync_buffer().
-                                */
-                               brelse(bh);
-                               spin_lock(lock);
-                       }
-               }
-       }
-
-       spin_unlock(lock);
-       blk_finish_plug(&plug);
-       spin_lock(lock);
-
-       while (!list_empty(&tmp)) {
-               bh = BH_ENTRY(tmp.prev);
-               get_bh(bh);
-               mapping = bh->b_assoc_map;
-               __remove_assoc_queue(bh);
-               /* Avoid race with mark_buffer_dirty_inode() which does
-                * a lockless check and we rely on seeing the dirty bit */
-               smp_mb();
-               if (buffer_dirty(bh)) {
-                       list_add(&bh->b_assoc_buffers,
-                                &mapping->i_private_list);
-                       bh->b_assoc_map = mapping;
-               }
-               spin_unlock(lock);
-               wait_on_buffer(bh);
-               if (!buffer_uptodate(bh))
-                       err = -EIO;
-               brelse(bh);
-               spin_lock(lock);
-       }
-       
-       spin_unlock(lock);
-       return err;
-}
-
  /*
   * Invalidate any and all dirty buffers on a given inode.  We are
   * probably unmounting the fs, but that doesn't mean we have already
author	Jan Kara <jack@suse.cz>
	Thu, 26 Mar 2026 09:54:23 +0000 (10:54 +0100)
committer	Christian Brauner <brauner@kernel.org>
	Thu, 26 Mar 2026 14:03:30 +0000 (15:03 +0100)