]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/2.6.32.19/btrfs-fix-disk_i_size-update-corner-case.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 2.6.32.19 / btrfs-fix-disk_i_size-update-corner-case.patch
1 From c216775458a2ee345d9412a2770c2916acfb5d30 Mon Sep 17 00:00:00 2001
2 From: Yan, Zheng <zheng.yan@oracle.com>
3 Date: Thu, 12 Nov 2009 09:34:21 +0000
4 Subject: Btrfs: Fix disk_i_size update corner case
5
6 From: Yan, Zheng <zheng.yan@oracle.com>
7
8 commit c216775458a2ee345d9412a2770c2916acfb5d30 upstream.
9
10 There are some cases file extents are inserted without involving
11 ordered struct. In these cases, we update disk_i_size directly,
12 without checking pending ordered extent and DELALLOC bit. This
13 patch extends btrfs_ordered_update_i_size() to handle these cases.
14
15 Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
16 Signed-off-by: Chris Mason <chris.mason@oracle.com>
17 Acked-by: Jeff Mahoney <jeffm@suse.com>
18 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
19
20 ---
21 fs/btrfs/btrfs_inode.h | 5 --
22 fs/btrfs/inode.c | 71 ++++++++++++++++++++------------
23 fs/btrfs/ordered-data.c | 105 +++++++++++++++++++++++++++++++++++++-----------
24 fs/btrfs/ordered-data.h | 2
25 4 files changed, 127 insertions(+), 56 deletions(-)
26
27 --- a/fs/btrfs/btrfs_inode.h
28 +++ b/fs/btrfs/btrfs_inode.h
29 @@ -44,9 +44,6 @@ struct btrfs_inode {
30 */
31 struct extent_io_tree io_failure_tree;
32
33 - /* held while inesrting or deleting extents from files */
34 - struct mutex extent_mutex;
35 -
36 /* held while logging the inode in tree-log.c */
37 struct mutex log_mutex;
38
39 @@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_
40
41 static inline void btrfs_i_size_write(struct inode *inode, u64 size)
42 {
43 - inode->i_size = size;
44 + i_size_write(inode, size);
45 BTRFS_I(inode)->disk_i_size = size;
46 }
47
48 --- a/fs/btrfs/inode.c
49 +++ b/fs/btrfs/inode.c
50 @@ -188,8 +188,18 @@ static noinline int insert_inline_extent
51 btrfs_mark_buffer_dirty(leaf);
52 btrfs_free_path(path);
53
54 + /*
55 + * we're an inline extent, so nobody can
56 + * extend the file past i_size without locking
57 + * a page we already have locked.
58 + *
59 + * We must do any isize and inode updates
60 + * before we unlock the pages. Otherwise we
61 + * could end up racing with unlink.
62 + */
63 BTRFS_I(inode)->disk_i_size = inode->i_size;
64 btrfs_update_inode(trans, root, inode);
65 +
66 return 0;
67 fail:
68 btrfs_free_path(path);
69 @@ -415,7 +425,6 @@ again:
70 start, end,
71 total_compressed, pages);
72 }
73 - btrfs_end_transaction(trans, root);
74 if (ret == 0) {
75 /*
76 * inline extent creation worked, we don't need
77 @@ -429,9 +438,11 @@ again:
78 EXTENT_CLEAR_DELALLOC |
79 EXTENT_CLEAR_ACCOUNTING |
80 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
81 - ret = 0;
82 +
83 + btrfs_end_transaction(trans, root);
84 goto free_pages_out;
85 }
86 + btrfs_end_transaction(trans, root);
87 }
88
89 if (will_compress) {
90 @@ -542,7 +553,6 @@ static noinline int submit_compressed_ex
91 if (list_empty(&async_cow->extents))
92 return 0;
93
94 - trans = btrfs_join_transaction(root, 1);
95
96 while (!list_empty(&async_cow->extents)) {
97 async_extent = list_entry(async_cow->extents.next,
98 @@ -589,19 +599,15 @@ retry:
99 lock_extent(io_tree, async_extent->start,
100 async_extent->start + async_extent->ram_size - 1,
101 GFP_NOFS);
102 - /*
103 - * here we're doing allocation and writeback of the
104 - * compressed pages
105 - */
106 - btrfs_drop_extent_cache(inode, async_extent->start,
107 - async_extent->start +
108 - async_extent->ram_size - 1, 0);
109
110 + trans = btrfs_join_transaction(root, 1);
111 ret = btrfs_reserve_extent(trans, root,
112 async_extent->compressed_size,
113 async_extent->compressed_size,
114 0, alloc_hint,
115 (u64)-1, &ins, 1);
116 + btrfs_end_transaction(trans, root);
117 +
118 if (ret) {
119 int i;
120 for (i = 0; i < async_extent->nr_pages; i++) {
121 @@ -617,6 +623,14 @@ retry:
122 goto retry;
123 }
124
125 + /*
126 + * here we're doing allocation and writeback of the
127 + * compressed pages
128 + */
129 + btrfs_drop_extent_cache(inode, async_extent->start,
130 + async_extent->start +
131 + async_extent->ram_size - 1, 0);
132 +
133 em = alloc_extent_map(GFP_NOFS);
134 em->start = async_extent->start;
135 em->len = async_extent->ram_size;
136 @@ -648,8 +662,6 @@ retry:
137 BTRFS_ORDERED_COMPRESSED);
138 BUG_ON(ret);
139
140 - btrfs_end_transaction(trans, root);
141 -
142 /*
143 * clear dirty, set writeback and unlock the pages.
144 */
145 @@ -671,13 +683,11 @@ retry:
146 async_extent->nr_pages);
147
148 BUG_ON(ret);
149 - trans = btrfs_join_transaction(root, 1);
150 alloc_hint = ins.objectid + ins.offset;
151 kfree(async_extent);
152 cond_resched();
153 }
154
155 - btrfs_end_transaction(trans, root);
156 return 0;
157 }
158
159 @@ -741,6 +751,7 @@ static noinline int cow_file_range(struc
160 EXTENT_CLEAR_DIRTY |
161 EXTENT_SET_WRITEBACK |
162 EXTENT_END_WRITEBACK);
163 +
164 *nr_written = *nr_written +
165 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
166 *page_started = 1;
167 @@ -1727,18 +1738,27 @@ static int btrfs_finish_ordered_io(struc
168 }
169 }
170
171 - trans = btrfs_join_transaction(root, 1);
172 -
173 if (!ordered_extent)
174 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
175 BUG_ON(!ordered_extent);
176 - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
177 - goto nocow;
178 + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
179 + BUG_ON(!list_empty(&ordered_extent->list));
180 + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
181 + if (!ret) {
182 + trans = btrfs_join_transaction(root, 1);
183 + ret = btrfs_update_inode(trans, root, inode);
184 + BUG_ON(ret);
185 + btrfs_end_transaction(trans, root);
186 + }
187 + goto out;
188 + }
189
190 lock_extent(io_tree, ordered_extent->file_offset,
191 ordered_extent->file_offset + ordered_extent->len - 1,
192 GFP_NOFS);
193
194 + trans = btrfs_join_transaction(root, 1);
195 +
196 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
197 compressed = 1;
198 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
199 @@ -1765,22 +1785,20 @@ static int btrfs_finish_ordered_io(struc
200 unlock_extent(io_tree, ordered_extent->file_offset,
201 ordered_extent->file_offset + ordered_extent->len - 1,
202 GFP_NOFS);
203 -nocow:
204 add_pending_csums(trans, inode, ordered_extent->file_offset,
205 &ordered_extent->list);
206
207 - mutex_lock(&BTRFS_I(inode)->extent_mutex);
208 - btrfs_ordered_update_i_size(inode, ordered_extent);
209 - btrfs_update_inode(trans, root, inode);
210 - btrfs_remove_ordered_extent(inode, ordered_extent);
211 - mutex_unlock(&BTRFS_I(inode)->extent_mutex);
212 -
213 + /* this also removes the ordered extent from the tree */
214 + btrfs_ordered_update_i_size(inode, 0, ordered_extent);
215 + ret = btrfs_update_inode(trans, root, inode);
216 + BUG_ON(ret);
217 + btrfs_end_transaction(trans, root);
218 +out:
219 /* once for us */
220 btrfs_put_ordered_extent(ordered_extent);
221 /* once for the tree */
222 btrfs_put_ordered_extent(ordered_extent);
223
224 - btrfs_end_transaction(trans, root);
225 return 0;
226 }
227
228 @@ -3562,7 +3580,6 @@ static noinline void init_btrfs_i(struct
229 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
230 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
231 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
232 - mutex_init(&BTRFS_I(inode)->extent_mutex);
233 mutex_init(&BTRFS_I(inode)->log_mutex);
234 }
235
236 --- a/fs/btrfs/ordered-data.c
237 +++ b/fs/btrfs/ordered-data.c
238 @@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrf
239
240 /*
241 * remove an ordered extent from the tree. No references are dropped
242 - * but, anyone waiting on this extent is woken up.
243 + * and you must wake_up entry->wait. You must hold the tree mutex
244 + * while you call this function.
245 */
246 -int btrfs_remove_ordered_extent(struct inode *inode,
247 +static int __btrfs_remove_ordered_extent(struct inode *inode,
248 struct btrfs_ordered_extent *entry)
249 {
250 struct btrfs_ordered_inode_tree *tree;
251 struct rb_node *node;
252
253 tree = &BTRFS_I(inode)->ordered_tree;
254 - mutex_lock(&tree->mutex);
255 node = &entry->rb_node;
256 rb_erase(node, &tree->tree);
257 tree->last = NULL;
258 @@ -326,9 +326,26 @@ int btrfs_remove_ordered_extent(struct i
259 }
260 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
261
262 + return 0;
263 +}
264 +
265 +/*
266 + * remove an ordered extent from the tree. No references are dropped
267 + * but any waiters are woken.
268 + */
269 +int btrfs_remove_ordered_extent(struct inode *inode,
270 + struct btrfs_ordered_extent *entry)
271 +{
272 + struct btrfs_ordered_inode_tree *tree;
273 + int ret;
274 +
275 + tree = &BTRFS_I(inode)->ordered_tree;
276 + mutex_lock(&tree->mutex);
277 + ret = __btrfs_remove_ordered_extent(inode, entry);
278 mutex_unlock(&tree->mutex);
279 wake_up(&entry->wait);
280 - return 0;
281 +
282 + return ret;
283 }
284
285 /*
286 @@ -589,7 +606,7 @@ out:
287 * After an extent is done, call this to conditionally update the on disk
288 * i_size. i_size is updated to cover any fully written part of the file.
289 */
290 -int btrfs_ordered_update_i_size(struct inode *inode,
291 +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
292 struct btrfs_ordered_extent *ordered)
293 {
294 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
295 @@ -597,18 +614,30 @@ int btrfs_ordered_update_i_size(struct i
296 u64 disk_i_size;
297 u64 new_i_size;
298 u64 i_size_test;
299 + u64 i_size = i_size_read(inode);
300 struct rb_node *node;
301 + struct rb_node *prev = NULL;
302 struct btrfs_ordered_extent *test;
303 + int ret = 1;
304 +
305 + if (ordered)
306 + offset = entry_end(ordered);
307
308 mutex_lock(&tree->mutex);
309 disk_i_size = BTRFS_I(inode)->disk_i_size;
310
311 + /* truncate file */
312 + if (disk_i_size > i_size) {
313 + BTRFS_I(inode)->disk_i_size = i_size;
314 + ret = 0;
315 + goto out;
316 + }
317 +
318 /*
319 * if the disk i_size is already at the inode->i_size, or
320 * this ordered extent is inside the disk i_size, we're done
321 */
322 - if (disk_i_size >= inode->i_size ||
323 - ordered->file_offset + ordered->len <= disk_i_size) {
324 + if (disk_i_size == i_size || offset <= disk_i_size) {
325 goto out;
326 }
327
328 @@ -616,8 +645,7 @@ int btrfs_ordered_update_i_size(struct i
329 * we can't update the disk_isize if there are delalloc bytes
330 * between disk_i_size and this ordered extent
331 */
332 - if (test_range_bit(io_tree, disk_i_size,
333 - ordered->file_offset + ordered->len - 1,
334 + if (test_range_bit(io_tree, disk_i_size, offset - 1,
335 EXTENT_DELALLOC, 0, NULL)) {
336 goto out;
337 }
338 @@ -626,20 +654,32 @@ int btrfs_ordered_update_i_size(struct i
339 * if we find an ordered extent then we can't update disk i_size
340 * yet
341 */
342 - node = &ordered->rb_node;
343 - while (1) {
344 - node = rb_prev(node);
345 - if (!node)
346 - break;
347 + if (ordered) {
348 + node = rb_prev(&ordered->rb_node);
349 + } else {
350 + prev = tree_search(tree, offset);
351 + /*
352 + * we insert file extents without involving ordered struct,
353 + * so there should be no ordered struct cover this offset
354 + */
355 + if (prev) {
356 + test = rb_entry(prev, struct btrfs_ordered_extent,
357 + rb_node);
358 + BUG_ON(offset_in_entry(test, offset));
359 + }
360 + node = prev;
361 + }
362 + while (node) {
363 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
364 if (test->file_offset + test->len <= disk_i_size)
365 break;
366 - if (test->file_offset >= inode->i_size)
367 + if (test->file_offset >= i_size)
368 break;
369 if (test->file_offset >= disk_i_size)
370 goto out;
371 + node = rb_prev(node);
372 }
373 - new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
374 + new_i_size = min_t(u64, offset, i_size);
375
376 /*
377 * at this point, we know we can safely update i_size to at least
378 @@ -647,7 +687,14 @@ int btrfs_ordered_update_i_size(struct i
379 * walk forward and see if ios from higher up in the file have
380 * finished.
381 */
382 - node = rb_next(&ordered->rb_node);
383 + if (ordered) {
384 + node = rb_next(&ordered->rb_node);
385 + } else {
386 + if (prev)
387 + node = rb_next(prev);
388 + else
389 + node = rb_first(&tree->tree);
390 + }
391 i_size_test = 0;
392 if (node) {
393 /*
394 @@ -655,10 +702,10 @@ int btrfs_ordered_update_i_size(struct i
395 * between our ordered extent and the next one.
396 */
397 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
398 - if (test->file_offset > entry_end(ordered))
399 + if (test->file_offset > offset)
400 i_size_test = test->file_offset;
401 } else {
402 - i_size_test = i_size_read(inode);
403 + i_size_test = i_size;
404 }
405
406 /*
407 @@ -667,15 +714,25 @@ int btrfs_ordered_update_i_size(struct i
408 * are no delalloc bytes in this area, it is safe to update
409 * disk_i_size to the end of the region.
410 */
411 - if (i_size_test > entry_end(ordered) &&
412 - !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
413 - EXTENT_DELALLOC, 0, NULL)) {
414 - new_i_size = min_t(u64, i_size_test, i_size_read(inode));
415 + if (i_size_test > offset &&
416 + !test_range_bit(io_tree, offset, i_size_test - 1,
417 + EXTENT_DELALLOC, 0, NULL)) {
418 + new_i_size = min_t(u64, i_size_test, i_size);
419 }
420 BTRFS_I(inode)->disk_i_size = new_i_size;
421 + ret = 0;
422 out:
423 + /*
424 + * we need to remove the ordered extent with the tree lock held
425 + * so that other people calling this function don't find our fully
426 + * processed ordered entry and skip updating the i_size
427 + */
428 + if (ordered)
429 + __btrfs_remove_ordered_extent(inode, ordered);
430 mutex_unlock(&tree->mutex);
431 - return 0;
432 + if (ordered)
433 + wake_up(&ordered->wait);
434 + return ret;
435 }
436
437 /*
438 --- a/fs/btrfs/ordered-data.h
439 +++ b/fs/btrfs/ordered-data.h
440 @@ -150,7 +150,7 @@ void btrfs_start_ordered_extent(struct i
441 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
442 struct btrfs_ordered_extent *
443 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
444 -int btrfs_ordered_update_i_size(struct inode *inode,
445 +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
446 struct btrfs_ordered_extent *ordered);
447 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
448 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);