#include "internal.h"
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
enum rw_hint hint, struct writeback_control *wbc);
* @mapping: the mapping which wants those buffers written
*
* Starts I/O against the buffers at mapping->i_private_list, and waits upon
- * that I/O.
+ * that I/O. Basically, this is a convenience function for fsync(). @mapping
+ * is a file or directory which needs those buffers to be written for a
+ * successful fsync().
*
- * Basically, this is a convenience function for fsync().
- * @mapping is a file or directory which needs those buffers to be written for
- * a successful fsync().
+ * We have conflicting pressures: we want to make sure that all
+ * initially dirty buffers get waited on, but that any subsequently
+ * dirtied buffers don't. After all, we don't want fsync to last
+ * forever if somebody is actively writing to the file.
+ *
+ * Do this in two main stages: first we copy dirty buffers to a
+ * temporary inode list, queueing the writes as we go. Then we clean
+ * up, waiting for those writes to complete. mark_buffer_dirty_inode()
+ * doesn't touch b_assoc_buffers list if b_assoc_map is not NULL so we
+ * are sure the buffer stays on our list until IO completes (at which point
+ * it can be reaped).
*/
int sync_mapping_buffers(struct address_space *mapping)
{
struct address_space *buffer_mapping =
mapping->host->i_sb->s_bdev->bd_mapping;
+ struct buffer_head *bh;
+ int err = 0;
+ struct blk_plug plug;
+ LIST_HEAD(tmp);
if (list_empty(&mapping->i_private_list))
return 0;
- return fsync_buffers_list(&buffer_mapping->i_private_lock,
- &mapping->i_private_list);
+ blk_start_plug(&plug);
+
+ spin_lock(&buffer_mapping->i_private_lock);
+ while (!list_empty(&mapping->i_private_list)) {
+ bh = BH_ENTRY(mapping->i_private_list.next);
+ WARN_ON_ONCE(bh->b_assoc_map != mapping);
+ __remove_assoc_queue(bh);
+ /* Avoid race with mark_buffer_dirty_inode() which does
+ * a lockless check and we rely on seeing the dirty bit */
+ smp_mb();
+ if (buffer_dirty(bh) || buffer_locked(bh)) {
+ list_add(&bh->b_assoc_buffers, &tmp);
+ bh->b_assoc_map = mapping;
+ if (buffer_dirty(bh)) {
+ get_bh(bh);
+ spin_unlock(&buffer_mapping->i_private_lock);
+ /*
+ * Ensure any pending I/O completes so that
+ * write_dirty_buffer() actually writes the
+ * current contents - it is a noop if I/O is
+ * still in flight on potentially older
+ * contents.
+ */
+ write_dirty_buffer(bh, REQ_SYNC);
+
+ /*
+ * Kick off IO for the previous mapping. Note
+ * that we will not run the very last mapping,
+ * wait_on_buffer() will do that for us
+ * through sync_buffer().
+ */
+ brelse(bh);
+ spin_lock(&buffer_mapping->i_private_lock);
+ }
+ }
+ }
+
+ spin_unlock(&buffer_mapping->i_private_lock);
+ blk_finish_plug(&plug);
+ spin_lock(&buffer_mapping->i_private_lock);
+
+ while (!list_empty(&tmp)) {
+ bh = BH_ENTRY(tmp.prev);
+ get_bh(bh);
+ __remove_assoc_queue(bh);
+ /* Avoid race with mark_buffer_dirty_inode() which does
+ * a lockless check and we rely on seeing the dirty bit */
+ smp_mb();
+ if (buffer_dirty(bh)) {
+ list_add(&bh->b_assoc_buffers,
+ &mapping->i_private_list);
+ bh->b_assoc_map = mapping;
+ }
+ spin_unlock(&buffer_mapping->i_private_lock);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
+ spin_lock(&buffer_mapping->i_private_lock);
+ }
+ spin_unlock(&buffer_mapping->i_private_lock);
+ return err;
}
EXPORT_SYMBOL(sync_mapping_buffers);
}
EXPORT_SYMBOL(block_dirty_folio);
-/*
- * Write out and wait upon a list of buffers.
- *
- * We have conflicting pressures: we want to make sure that all
- * initially dirty buffers get waited on, but that any subsequently
- * dirtied buffers don't. After all, we don't want fsync to last
- * forever if somebody is actively writing to the file.
- *
- * Do this in two main stages: first we copy dirty buffers to a
- * temporary inode list, queueing the writes as we go. Then we clean
- * up, waiting for those writes to complete.
- *
- * During this second stage, any subsequent updates to the file may end
- * up refiling the buffer on the original inode's dirty list again, so
- * there is a chance we will end up with a buffer queued for write but
- * not yet completed on that list. So, as a final cleanup we go through
- * the osync code to catch these locked, dirty buffers without requeuing
- * any newly dirty buffers for write.
- */
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
-{
- struct buffer_head *bh;
- struct address_space *mapping;
- int err = 0;
- struct blk_plug plug;
- LIST_HEAD(tmp);
-
- blk_start_plug(&plug);
-
- spin_lock(lock);
- while (!list_empty(list)) {
- bh = BH_ENTRY(list->next);
- mapping = bh->b_assoc_map;
- __remove_assoc_queue(bh);
- /* Avoid race with mark_buffer_dirty_inode() which does
- * a lockless check and we rely on seeing the dirty bit */
- smp_mb();
- if (buffer_dirty(bh) || buffer_locked(bh)) {
- list_add(&bh->b_assoc_buffers, &tmp);
- bh->b_assoc_map = mapping;
- if (buffer_dirty(bh)) {
- get_bh(bh);
- spin_unlock(lock);
- /*
- * Ensure any pending I/O completes so that
- * write_dirty_buffer() actually writes the
- * current contents - it is a noop if I/O is
- * still in flight on potentially older
- * contents.
- */
- write_dirty_buffer(bh, REQ_SYNC);
-
- /*
- * Kick off IO for the previous mapping. Note
- * that we will not run the very last mapping,
- * wait_on_buffer() will do that for us
- * through sync_buffer().
- */
- brelse(bh);
- spin_lock(lock);
- }
- }
- }
-
- spin_unlock(lock);
- blk_finish_plug(&plug);
- spin_lock(lock);
-
- while (!list_empty(&tmp)) {
- bh = BH_ENTRY(tmp.prev);
- get_bh(bh);
- mapping = bh->b_assoc_map;
- __remove_assoc_queue(bh);
- /* Avoid race with mark_buffer_dirty_inode() which does
- * a lockless check and we rely on seeing the dirty bit */
- smp_mb();
- if (buffer_dirty(bh)) {
- list_add(&bh->b_assoc_buffers,
- &mapping->i_private_list);
- bh->b_assoc_map = mapping;
- }
- spin_unlock(lock);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- err = -EIO;
- brelse(bh);
- spin_lock(lock);
- }
-
- spin_unlock(lock);
- return err;
-}
-
/*
* Invalidate any and all dirty buffers on a given inode. We are
* probably unmounting the fs, but that doesn't mean we have already