]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/2.6.32.1/0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
drop queue-4.14/mips-make-sure-dt-memory-regions-are-valid.patch
[thirdparty/kernel/stable-queue.git] / releases / 2.6.32.1 / 0028-ext4-Wait-for-proper-transaction-commit-on-fsync.patch
1 From ebe71d4fcb5cad29134efb77a36b11a546616104 Mon Sep 17 00:00:00 2001
2 From: Jan Kara <jack@suse.cz>
3 Date: Tue, 8 Dec 2009 23:51:10 -0500
4 Subject: [PATCH 28/30] ext4: Wait for proper transaction commit on fsync
5
6 (cherry picked from commit b436b9bef84de6893e86346d8fbf7104bc520645)
7
8 We cannot rely on buffer dirty bits during fsync because pdflush can come
9 before fsync is called and clear dirty bits without forcing a transaction
10 commit. What we do is that we track which transaction has last changed
11 the inode and which transaction last changed allocation and force it to
12 disk on fsync.
13
14 Signed-off-by: Jan Kara <jack@suse.cz>
15 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
16 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
17 ---
18 fs/ext4/ext4.h | 7 +++++++
19 fs/ext4/ext4_jbd2.h | 13 +++++++++++++
20 fs/ext4/extents.c | 14 ++++++++++++--
21 fs/ext4/fsync.c | 46 +++++++++++++++++-----------------------------
22 fs/ext4/inode.c | 29 +++++++++++++++++++++++++++++
23 fs/ext4/super.c | 2 ++
24 fs/jbd2/journal.c | 1 +
25 7 files changed, 81 insertions(+), 31 deletions(-)
26
27 --- a/fs/ext4/ext4.h
28 +++ b/fs/ext4/ext4.h
29 @@ -703,6 +703,13 @@ struct ext4_inode_info {
30 struct list_head i_aio_dio_complete_list;
31 /* current io_end structure for async DIO write*/
32 ext4_io_end_t *cur_aio_dio;
33 +
34 + /*
35 + * Transactions that contain inode's metadata needed to complete
36 + * fsync and fdatasync, respectively.
37 + */
38 + tid_t i_sync_tid;
39 + tid_t i_datasync_tid;
40 };
41
42 /*
43 --- a/fs/ext4/ext4_jbd2.h
44 +++ b/fs/ext4/ext4_jbd2.h
45 @@ -258,6 +258,19 @@ static inline int ext4_jbd2_file_inode(h
46 return 0;
47 }
48
49 +static inline void ext4_update_inode_fsync_trans(handle_t *handle,
50 + struct inode *inode,
51 + int datasync)
52 +{
53 + struct ext4_inode_info *ei = EXT4_I(inode);
54 +
55 + if (ext4_handle_valid(handle)) {
56 + ei->i_sync_tid = handle->h_transaction->t_tid;
57 + if (datasync)
58 + ei->i_datasync_tid = handle->h_transaction->t_tid;
59 + }
60 +}
61 +
62 /* super.c */
63 int ext4_force_commit(struct super_block *sb);
64
65 --- a/fs/ext4/extents.c
66 +++ b/fs/ext4/extents.c
67 @@ -3064,6 +3064,8 @@ ext4_ext_handle_uninitialized_extents(ha
68 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
69 ret = ext4_convert_unwritten_extents_dio(handle, inode,
70 path);
71 + if (ret >= 0)
72 + ext4_update_inode_fsync_trans(handle, inode, 1);
73 goto out2;
74 }
75 /* buffered IO case */
76 @@ -3091,6 +3093,8 @@ ext4_ext_handle_uninitialized_extents(ha
77 ret = ext4_ext_convert_to_initialized(handle, inode,
78 path, iblock,
79 max_blocks);
80 + if (ret >= 0)
81 + ext4_update_inode_fsync_trans(handle, inode, 1);
82 out:
83 if (ret <= 0) {
84 err = ret;
85 @@ -3329,10 +3333,16 @@ int ext4_ext_get_blocks(handle_t *handle
86 allocated = ext4_ext_get_actual_len(&newex);
87 set_buffer_new(bh_result);
88
89 - /* Cache only when it is _not_ an uninitialized extent */
90 - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
91 + /*
92 + * Cache the extent and update transaction to commit on fdatasync only
93 + * when it is _not_ an uninitialized extent.
94 + */
95 + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
96 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
97 EXT4_EXT_CACHE_EXTENT);
98 + ext4_update_inode_fsync_trans(handle, inode, 1);
99 + } else
100 + ext4_update_inode_fsync_trans(handle, inode, 0);
101 out:
102 if (allocated > max_blocks)
103 allocated = max_blocks;
104 --- a/fs/ext4/fsync.c
105 +++ b/fs/ext4/fsync.c
106 @@ -51,25 +51,30 @@
107 int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
108 {
109 struct inode *inode = dentry->d_inode;
110 + struct ext4_inode_info *ei = EXT4_I(inode);
111 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
112 - int err, ret = 0;
113 + int ret;
114 + tid_t commit_tid;
115
116 J_ASSERT(ext4_journal_current_handle() == NULL);
117
118 trace_ext4_sync_file(file, dentry, datasync);
119
120 + if (inode->i_sb->s_flags & MS_RDONLY)
121 + return 0;
122 +
123 ret = flush_aio_dio_completed_IO(inode);
124 if (ret < 0)
125 return ret;
126 +
127 + if (!journal)
128 + return simple_fsync(file, dentry, datasync);
129 +
130 /*
131 - * data=writeback:
132 + * data=writeback,ordered:
133 * The caller's filemap_fdatawrite()/wait will sync the data.
134 - * sync_inode() will sync the metadata
135 - *
136 - * data=ordered:
137 - * The caller's filemap_fdatawrite() will write the data and
138 - * sync_inode() will write the inode if it is dirty. Then the caller's
139 - * filemap_fdatawait() will wait on the pages.
140 + * Metadata is in the journal, we wait for proper transaction to
141 + * commit here.
142 *
143 * data=journal:
144 * filemap_fdatawrite won't do anything (the buffers are clean).
145 @@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, st
146 if (ext4_should_journal_data(inode))
147 return ext4_force_commit(inode->i_sb);
148
149 - if (!journal)
150 - ret = sync_mapping_buffers(inode->i_mapping);
151 -
152 - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
153 - goto out;
154 -
155 - /*
156 - * The VFS has written the file data. If the inode is unaltered
157 - * then we need not start a commit.
158 - */
159 - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
160 - struct writeback_control wbc = {
161 - .sync_mode = WB_SYNC_ALL,
162 - .nr_to_write = 0, /* sys_fsync did this */
163 - };
164 - err = sync_inode(inode, &wbc);
165 - if (ret == 0)
166 - ret = err;
167 - }
168 -out:
169 - if (journal && (journal->j_flags & JBD2_BARRIER))
170 + commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
171 + if (jbd2_log_start_commit(journal, commit_tid))
172 + jbd2_log_wait_commit(journal, commit_tid);
173 + else if (journal->j_flags & JBD2_BARRIER)
174 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
175 return ret;
176 }
177 --- a/fs/ext4/inode.c
178 +++ b/fs/ext4/inode.c
179 @@ -1025,6 +1025,8 @@ static int ext4_ind_get_blocks(handle_t
180 goto cleanup;
181
182 set_buffer_new(bh_result);
183 +
184 + ext4_update_inode_fsync_trans(handle, inode, 1);
185 got_it:
186 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
187 if (count > blocks_to_boundary)
188 @@ -4794,6 +4796,7 @@ struct inode *ext4_iget(struct super_blo
189 struct ext4_inode *raw_inode;
190 struct ext4_inode_info *ei;
191 struct inode *inode;
192 + journal_t *journal = EXT4_SB(sb)->s_journal;
193 long ret;
194 int block;
195
196 @@ -4858,6 +4861,31 @@ struct inode *ext4_iget(struct super_blo
197 ei->i_data[block] = raw_inode->i_block[block];
198 INIT_LIST_HEAD(&ei->i_orphan);
199
200 + /*
201 + * Set transaction id's of transactions that have to be committed
202 + * to finish f[data]sync. We set them to currently running transaction
203 + * as we cannot be sure that the inode or some of its metadata isn't
204 + * part of the transaction - the inode could have been reclaimed and
205 + * now it is reread from disk.
206 + */
207 + if (journal) {
208 + transaction_t *transaction;
209 + tid_t tid;
210 +
211 + spin_lock(&journal->j_state_lock);
212 + if (journal->j_running_transaction)
213 + transaction = journal->j_running_transaction;
214 + else
215 + transaction = journal->j_committing_transaction;
216 + if (transaction)
217 + tid = transaction->t_tid;
218 + else
219 + tid = journal->j_commit_sequence;
220 + spin_unlock(&journal->j_state_lock);
221 + ei->i_sync_tid = tid;
222 + ei->i_datasync_tid = tid;
223 + }
224 +
225 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
226 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
227 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
228 @@ -5112,6 +5140,7 @@ static int ext4_do_update_inode(handle_t
229 err = rc;
230 ei->i_state &= ~EXT4_STATE_NEW;
231
232 + ext4_update_inode_fsync_trans(handle, inode, 0);
233 out_brelse:
234 brelse(bh);
235 ext4_std_error(inode->i_sb, err);
236 --- a/fs/ext4/super.c
237 +++ b/fs/ext4/super.c
238 @@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(st
239 spin_lock_init(&(ei->i_block_reservation_lock));
240 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
241 ei->cur_aio_dio = NULL;
242 + ei->i_sync_tid = 0;
243 + ei->i_datasync_tid = 0;
244
245 return &ei->vfs_inode;
246 }
247 --- a/fs/jbd2/journal.c
248 +++ b/fs/jbd2/journal.c
249 @@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
250 EXPORT_SYMBOL(jbd2_journal_ack_err);
251 EXPORT_SYMBOL(jbd2_journal_clear_err);
252 EXPORT_SYMBOL(jbd2_log_wait_commit);
253 +EXPORT_SYMBOL(jbd2_log_start_commit);
254 EXPORT_SYMBOL(jbd2_journal_start_commit);
255 EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
256 EXPORT_SYMBOL(jbd2_journal_wipe);