]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
jbd2: store jinode dirty range in PAGE_SIZE units
authorLi Chen <me@linux.beauty>
Fri, 6 Mar 2026 08:56:42 +0000 (16:56 +0800)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 9 Apr 2026 14:52:35 +0000 (10:52 -0400)
jbd2_inode fields are updated under journal->j_list_lock, but some paths
read them without holding the lock (e.g. fast commit helpers and ordered
truncate helpers).

READ_ONCE() alone is not sufficient for the dirty range fields when they
are stored as loff_t because 32-bit platforms can observe torn loads.
Store the dirty range in PAGE_SIZE units as pgoff_t instead.

Represent the dirty range end as an exclusive end page. This avoids a
special sentinel value and keeps MAX_LFS_FILESIZE on 32-bit representable.

Publish a new dirty range by updating end_page before start_page, and
treat start_page >= end_page as empty in the accessor for robustness.

Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for the
dirty range and i_flags to match the existing lockless access pattern.

Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Li Chen <me@linux.beauty>
Link: https://patch.msgid.link/20260306085643.465275-5-me@linux.beauty
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
include/linux/jbd2.h

index 7203d2d2624d7afd0d7a3fe7f830845665dd4c14..8cf61e7185c4419cd0b4a036cfdf6e1379cdff99 100644 (file)
@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
 /* Send all the data buffers related to an inode */
 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-       if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+       unsigned long flags;
+
+       if (!jinode)
+               return 0;
+
+       flags = READ_ONCE(jinode->i_flags);
+       if (!(flags & JI_WRITE_DATA))
                return 0;
 
        trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
 
 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-       if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
-               !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+       struct address_space *mapping;
+       struct inode *inode;
+       unsigned long flags;
+       loff_t start_byte, end_byte;
+
+       if (!jinode)
+               return 0;
+
+       flags = READ_ONCE(jinode->i_flags);
+       if (!(flags & JI_WAIT_DATA))
+               return 0;
+
+       inode = jinode->i_vfs_inode;
+       if (!inode)
+               return 0;
+
+       mapping = inode->i_mapping;
+       if (!mapping)
+               return 0;
+
+       if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
                return 0;
        return filemap_fdatawait_range_keep_errors(
-               jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
-               jinode->i_dirty_end);
+               mapping, start_byte, end_byte);
 }
 EXPORT_SYMBOL(jbd2_wait_inode_data);
 
@@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal,
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
                if (!(jinode->i_flags & JI_WRITE_DATA))
                        continue;
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               WRITE_ONCE(jinode->i_flags,
+                          jinode->i_flags | JI_COMMIT_RUNNING);
                spin_unlock(&journal->j_list_lock);
                /* submit the inode data buffers. */
                trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal,
                }
                spin_lock(&journal->j_list_lock);
                J_ASSERT(jinode->i_transaction == commit_transaction);
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               WRITE_ONCE(jinode->i_flags,
+                          jinode->i_flags & ~JI_COMMIT_RUNNING);
                smp_mb();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
@@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal,
 int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
 {
        struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+       loff_t start_byte, end_byte;
+
+       if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
+               return 0;
 
        return filemap_fdatawait_range_keep_errors(mapping,
-                                                  jinode->i_dirty_start,
-                                                  jinode->i_dirty_end);
+                                                  start_byte, end_byte);
 }
 
 /*
@@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
                if (!(jinode->i_flags & JI_WAIT_DATA))
                        continue;
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
                spin_unlock(&journal->j_list_lock);
                /* wait for the inode data buffers writeout. */
                if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                }
                cond_resched();
                spin_lock(&journal->j_list_lock);
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
                smp_mb();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
@@ -288,8 +317,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                                &jinode->i_transaction->t_inode_list);
                } else {
                        jinode->i_transaction = NULL;
-                       jinode->i_dirty_start = 0;
-                       jinode->i_dirty_end = 0;
+                       WRITE_ONCE(jinode->i_dirty_start_page, 0);
+                       WRITE_ONCE(jinode->i_dirty_end_page, 0);
                }
        }
        spin_unlock(&journal->j_list_lock);
index cb2c529a8f1bea33df6d4135e5782b9a77792732..609c8d965f127edab495dc95fff0a9f674c3bef6 100644 (file)
@@ -3018,8 +3018,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
        jinode->i_next_transaction = NULL;
        jinode->i_vfs_inode = inode;
        jinode->i_flags = 0;
-       jinode->i_dirty_start = 0;
-       jinode->i_dirty_end = 0;
+       jinode->i_dirty_start_page = 0;
+       jinode->i_dirty_end_page = 0;
        INIT_LIST_HEAD(&jinode->i_list);
 }
 
@@ -3176,4 +3176,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_exit(journal_exit);
-
index 02cb87dc6fa8a5c91b7e527822141989142ec763..495f0012984459572c3dd30ec4f31bd4c8c91f73 100644 (file)
@@ -2694,6 +2694,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal;
+       pgoff_t start_page, end_page;
        int err = 0;
        int abort_transaction = 0;
 
@@ -2704,15 +2705,21 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
        jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
                        transaction->t_tid);
 
+       start_page = (pgoff_t)(start_byte >> PAGE_SHIFT);
+       end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1;
+
        spin_lock(&journal->j_list_lock);
-       jinode->i_flags |= flags;
+       WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
 
-       if (jinode->i_dirty_end) {
-               jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
-               jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+       if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) {
+               WRITE_ONCE(jinode->i_dirty_start_page,
+                          min(jinode->i_dirty_start_page, start_page));
+               WRITE_ONCE(jinode->i_dirty_end_page,
+                          max(jinode->i_dirty_end_page, end_page));
        } else {
-               jinode->i_dirty_start = start_byte;
-               jinode->i_dirty_end = end_byte;
+               /* Publish a new non-empty range by making end visible first. */
+               WRITE_ONCE(jinode->i_dirty_end_page, end_page);
+               WRITE_ONCE(jinode->i_dirty_start_page, start_page);
        }
 
        /* Is inode already attached where we need it? */
@@ -2802,7 +2809,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
        int ret = 0;
 
        /* This is a quick check to avoid locking if not necessary */
-       if (!jinode->i_transaction)
+       if (!READ_ONCE(jinode->i_transaction))
                goto out;
        /* Locks are here just to force reading of recent values, it is
         * enough that the transaction was not committing before we started
index 64392baf5f4b4f0c319bcf816e4be2482bc5d810..7e785aa6d35d652aef555360fcab57fde888826a 100644 (file)
@@ -429,33 +429,43 @@ struct jbd2_inode {
        unsigned long i_flags;
 
        /**
-        * @i_dirty_start:
+        * @i_dirty_start_page:
+        *
+        * Dirty range start in PAGE_SIZE units.
+        *
+        * The dirty range is empty if @i_dirty_start_page is greater than or
+        * equal to @i_dirty_end_page.
         *
-        * Offset in bytes where the dirty range for this inode starts.
         * [j_list_lock]
         */
-       loff_t i_dirty_start;
+       pgoff_t i_dirty_start_page;
 
        /**
-        * @i_dirty_end:
+        * @i_dirty_end_page:
+        *
+        * Dirty range end in PAGE_SIZE units (exclusive).
         *
-        * Inclusive offset in bytes where the dirty range for this inode
-        * ends. [j_list_lock]
+        * [j_list_lock]
         */
-       loff_t i_dirty_end;
+       pgoff_t i_dirty_end_page;
 };
 
+/*
+ * Lockless readers treat start_page >= end_page as an empty range.
+ * Writers publish a new non-empty range by storing i_dirty_end_page before
+ * i_dirty_start_page.
+ */
 static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
                                               loff_t *start, loff_t *end)
 {
-       loff_t start_byte = jinode->i_dirty_start;
-       loff_t end_byte = jinode->i_dirty_end;
+       pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page);
+       pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page);
 
-       if (!end_byte)
+       if (start_page >= end_page)
                return false;
 
-       *start = start_byte;
-       *end = end_byte;
+       *start = (loff_t)start_page << PAGE_SHIFT;
+       *end = ((loff_t)end_page << PAGE_SHIFT) - 1;
        return true;
 }