]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
jbd2: introduce jbd2_inode dirty range scoping
authorRoss Zwisler <zwisler@chromium.org>
Thu, 20 Jun 2019 21:24:56 +0000 (17:24 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 20 Jun 2019 21:24:56 +0000 (17:24 -0400)
Currently both journal_submit_inode_data_buffers() and
journal_finish_inode_data_buffers() operate on the entire address space
of each of the inodes associated with a given journal entry.  The
consequence of this is that if we have an inode where we are constantly
appending dirty pages we can end up waiting for an indefinite amount of
time in journal_finish_inode_data_buffers() while we wait for all the
pages under writeback to be written out.

The easiest way to cause this type of workload is do just dd from
/dev/zero to a file until it fills the entire filesystem.  This can
cause journal_finish_inode_data_buffers() to wait for the duration of
the entire dd operation.

We can improve this situation by scoping each of the inode dirty ranges
associated with a given transaction.  We do this via the jbd2_inode
structure so that the scoping is contained within jbd2 and so that it
follows the lifetime and locking rules for that structure.

This allows us to limit the writeback & wait in
journal_submit_inode_data_buffers() and
journal_finish_inode_data_buffers() respectively to the dirty range for
a given struct jdb2_inode, keeping us from waiting forever if the inode
in question is still being appended to.

Signed-off-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: stable@vger.kernel.org
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
include/linux/jbd2.h

index c8c1d6cc6e5dc7c8a0b127aace3c56b006d38a6b..132fb92098c7186b2d7d4abb95432612dba43d4e 100644 (file)
@@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal,
  * use writepages() because with delayed allocation we may be doing
  * block allocation in writepages().
  */
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+static int journal_submit_inode_data_buffers(struct address_space *mapping,
+               loff_t dirty_start, loff_t dirty_end)
 {
        int ret;
        struct writeback_control wbc = {
                .sync_mode =  WB_SYNC_ALL,
                .nr_to_write = mapping->nrpages * 2,
-               .range_start = 0,
-               .range_end = i_size_read(mapping->host),
+               .range_start = dirty_start,
+               .range_end = dirty_end,
        };
 
        ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
 
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               loff_t dirty_start = jinode->i_dirty_start;
+               loff_t dirty_end = jinode->i_dirty_end;
+
                if (!(jinode->i_flags & JI_WRITE_DATA))
                        continue;
                mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
                 * only allocated blocks here.
                 */
                trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
-               err = journal_submit_inode_data_buffers(mapping);
+               err = journal_submit_inode_data_buffers(mapping, dirty_start,
+                               dirty_end);
                if (!ret)
                        ret = err;
                spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
        /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               loff_t dirty_start = jinode->i_dirty_start;
+               loff_t dirty_end = jinode->i_dirty_end;
+
                if (!(jinode->i_flags & JI_WAIT_DATA))
                        continue;
                jinode->i_flags |= JI_COMMIT_RUNNING;
                spin_unlock(&journal->j_list_lock);
-               err = filemap_fdatawait_keep_errors(
-                               jinode->i_vfs_inode->i_mapping);
+               err = filemap_fdatawait_range_keep_errors(
+                               jinode->i_vfs_inode->i_mapping, dirty_start,
+                               dirty_end);
                if (!ret)
                        ret = err;
                spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                                &jinode->i_transaction->t_inode_list);
                } else {
                        jinode->i_transaction = NULL;
+                       jinode->i_dirty_start = 0;
+                       jinode->i_dirty_end = 0;
                }
        }
        spin_unlock(&journal->j_list_lock);
index 38b426c5ed03a09b970268e0097cad77668f0074..17f679aeba7c7a6ae0c6eeb6003cd68bac560228 100644 (file)
@@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
 EXPORT_SYMBOL(jbd2_journal_inode_add_write);
 EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
        jinode->i_next_transaction = NULL;
        jinode->i_vfs_inode = inode;
        jinode->i_flags = 0;
+       jinode->i_dirty_start = 0;
+       jinode->i_dirty_end = 0;
        INIT_LIST_HEAD(&jinode->i_list);
 }
 
index 8ca4fddc705fe999b80f90cf4968001af443f72c..990e7b5062e7485f79db417459dcd15e27081a07 100644 (file)
@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
  * File inode in the inode list of the handle's transaction
  */
 static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
-                                  unsigned long flags)
+               unsigned long flags, loff_t start_byte, loff_t end_byte)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal;
@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
        jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
                        transaction->t_tid);
 
-       /*
-        * First check whether inode isn't already on the transaction's
-        * lists without taking the lock. Note that this check is safe
-        * without the lock as we cannot race with somebody removing inode
-        * from the transaction. The reason is that we remove inode from the
-        * transaction only in journal_release_jbd_inode() and when we commit
-        * the transaction. We are guarded from the first case by holding
-        * a reference to the inode. We are safe against the second case
-        * because if jinode->i_transaction == transaction, commit code
-        * cannot touch the transaction because we hold reference to it,
-        * and if jinode->i_next_transaction == transaction, commit code
-        * will only file the inode where we want it.
-        */
-       if ((jinode->i_transaction == transaction ||
-           jinode->i_next_transaction == transaction) &&
-           (jinode->i_flags & flags) == flags)
-               return 0;
-
        spin_lock(&journal->j_list_lock);
        jinode->i_flags |= flags;
+
+       if (jinode->i_dirty_end) {
+               jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
+               jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+       } else {
+               jinode->i_dirty_start = start_byte;
+               jinode->i_dirty_end = end_byte;
+       }
+
        /* Is inode already attached where we need it? */
        if (jinode->i_transaction == transaction ||
            jinode->i_next_transaction == transaction)
@@ -2631,12 +2622,28 @@ done:
 int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
 {
        return jbd2_journal_file_inode(handle, jinode,
-                                      JI_WRITE_DATA | JI_WAIT_DATA);
+                       JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
 }
 
 int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
 {
-       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
+                       LLONG_MAX);
+}
+
+int jbd2_journal_inode_ranged_write(handle_t *handle,
+               struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
+{
+       return jbd2_journal_file_inode(handle, jinode,
+                       JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
+                       start_byte + length - 1);
+}
+
+int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
+               loff_t start_byte, loff_t length)
+{
+       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
+                       start_byte, start_byte + length - 1);
 }
 
 /*
index 5c04181b7c6d8dd6ac5b3a4ebbceaf8577aefe2c..0e0393e7f41a438bd37f25f4797a3707db52c055 100644 (file)
@@ -451,6 +451,22 @@ struct jbd2_inode {
         * @i_flags: Flags of inode [j_list_lock]
         */
        unsigned long i_flags;
+
+       /**
+        * @i_dirty_start:
+        *
+        * Offset in bytes where the dirty range for this inode starts.
+        * [j_list_lock]
+        */
+       loff_t i_dirty_start;
+
+       /**
+        * @i_dirty_end:
+        *
+        * Inclusive offset in bytes where the dirty range for this inode
+        * ends. [j_list_lock]
+        */
+       loff_t i_dirty_end;
 };
 
 struct jbd2_revoke_table_s;
@@ -1397,6 +1413,12 @@ extern int          jbd2_journal_force_commit(journal_t *);
 extern int        jbd2_journal_force_commit_nested(journal_t *);
 extern int        jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
 extern int        jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_inode_ranged_write(handle_t *handle,
+                       struct jbd2_inode *inode, loff_t start_byte,
+                       loff_t length);
+extern int        jbd2_journal_inode_ranged_wait(handle_t *handle,
+                       struct jbd2_inode *inode, loff_t start_byte,
+                       loff_t length);
 extern int        jbd2_journal_begin_ordered_truncate(journal_t *journal,
                                struct jbd2_inode *inode, loff_t new_size);
 extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);