]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/2.6.32.17/0007-ext4-Patch-up-how-we-claim-metadata-blocks-for-quota.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 2.6.32.17 / 0007-ext4-Patch-up-how-we-claim-metadata-blocks-for-quota.patch
CommitLineData
7d777456
GKH
1From 74ded2cc0427839ccdda41f2738130f0eea77fde Mon Sep 17 00:00:00 2001
2From: Theodore Ts'o <tytso@mit.edu>
3Date: Sun, 30 May 2010 22:49:21 -0400
4Subject: ext4: Patch up how we claim metadata blocks for quota purposes
5
6commit 0637c6f4135f592f094207c7c21e7c0fc5557834 upstream (as of v2.6.33-rc3)
7
8As reported in Kernel Bugzilla #14936, commit d21cd8f triggered a BUG
9in the function ext4_da_update_reserve_space() found in
10fs/ext4/inode.c. The root cause of this BUG() was caused by the fact
11that ext4_calc_metadata_amount() can severely over-estimate how many
12metadata blocks will be needed, especially when using direct
13block-mapped files.
14
15In addition, it can also badly *under* estimate how much space is
16needed, since ext4_calc_metadata_amount() assumes that the blocks are
17contiguous, and this is not always true. If the application is
18writing blocks to a sparse file, the number of metadata blocks
19necessary can be severly underestimated by the functions
20ext4_da_reserve_space(), ext4_da_update_reserve_space() and
21ext4_da_release_space(). This was the cause of the dq_claim_space
22reports found on kerneloops.org.
23
24Unfortunately, doing this right means that we need to massively
25over-estimate the amount of free space needed. So in some cases we
26may need to force the inode to be written to disk asynchronously in
27to avoid spurious quota failures.
28
29http://bugzilla.kernel.org/show_bug.cgi?id=14936
30
31Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
32Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
33---
34 fs/ext4/inode.c | 153 ++++++++++++++++++++++++++++++--------------------------
35 1 file changed, 82 insertions(+), 71 deletions(-)
36
37--- a/fs/ext4/inode.c
38+++ b/fs/ext4/inode.c
39@@ -1085,43 +1085,47 @@ static int ext4_calc_metadata_amount(str
40 return ext4_indirect_calc_metadata_amount(inode, blocks);
41 }
42
43+/*
44+ * Called with i_data_sem down, which is important since we can call
45+ * ext4_discard_preallocations() from here.
46+ */
47 static void ext4_da_update_reserve_space(struct inode *inode, int used)
48 {
49 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
50- int total, mdb, mdb_free, mdb_claim = 0;
51+ struct ext4_inode_info *ei = EXT4_I(inode);
52+ int mdb_free = 0;
53
54- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
55- /* recalculate the number of metablocks still need to be reserved */
56- total = EXT4_I(inode)->i_reserved_data_blocks - used;
57- mdb = ext4_calc_metadata_amount(inode, total);
58-
59- /* figure out how many metablocks to release */
60- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
61- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
62-
63- if (mdb_free) {
64- /* Account for allocated meta_blocks */
65- mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
66- BUG_ON(mdb_free < mdb_claim);
67- mdb_free -= mdb_claim;
68+ spin_lock(&ei->i_block_reservation_lock);
69+ if (unlikely(used > ei->i_reserved_data_blocks)) {
70+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
71+ "with only %d reserved data blocks\n",
72+ __func__, inode->i_ino, used,
73+ ei->i_reserved_data_blocks);
74+ WARN_ON(1);
75+ used = ei->i_reserved_data_blocks;
76+ }
77+
78+ /* Update per-inode reservations */
79+ ei->i_reserved_data_blocks -= used;
80+ used += ei->i_allocated_meta_blocks;
81+ ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
82+ ei->i_allocated_meta_blocks = 0;
83+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
84
85- /* update fs dirty blocks counter */
86+ if (ei->i_reserved_data_blocks == 0) {
87+ /*
88+ * We can release all of the reserved metadata blocks
89+ * only when we have written all of the delayed
90+ * allocation blocks.
91+ */
92+ mdb_free = ei->i_allocated_meta_blocks;
93 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
94- EXT4_I(inode)->i_allocated_meta_blocks = 0;
95- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
96+ ei->i_allocated_meta_blocks = 0;
97 }
98-
99- /* update per-inode reservations */
100- BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
101- EXT4_I(inode)->i_reserved_data_blocks -= used;
102- percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
103 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
104
105- vfs_dq_claim_block(inode, used + mdb_claim);
106-
107- /*
108- * free those over-booking quota for metadata blocks
109- */
110+ /* Update quota subsystem */
111+ vfs_dq_claim_block(inode, used);
112 if (mdb_free)
113 vfs_dq_release_reservation_block(inode, mdb_free);
114
115@@ -1130,7 +1134,8 @@ static void ext4_da_update_reserve_space
116 * there aren't any writers on the inode, we can discard the
117 * inode's preallocations.
118 */
119- if (!total && (atomic_read(&inode->i_writecount) == 0))
120+ if ((ei->i_reserved_data_blocks == 0) &&
121+ (atomic_read(&inode->i_writecount) == 0))
122 ext4_discard_preallocations(inode);
123 }
124
125@@ -1843,7 +1848,8 @@ static int ext4_da_reserve_space(struct
126 {
127 int retries = 0;
128 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
129- unsigned long md_needed, mdblocks, total = 0;
130+ struct ext4_inode_info *ei = EXT4_I(inode);
131+ unsigned long md_needed, md_reserved, total = 0;
132
133 /*
134 * recalculate the amount of metadata blocks to reserve
135@@ -1851,35 +1857,44 @@ static int ext4_da_reserve_space(struct
136 * worse case is one extent per block
137 */
138 repeat:
139- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
140- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
141- mdblocks = ext4_calc_metadata_amount(inode, total);
142- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
143-
144- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
145+ spin_lock(&ei->i_block_reservation_lock);
146+ md_reserved = ei->i_reserved_meta_blocks;
147+ md_needed = ext4_calc_metadata_amount(inode, nrblocks);
148 total = md_needed + nrblocks;
149- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
150+ spin_unlock(&ei->i_block_reservation_lock);
151
152 /*
153 * Make quota reservation here to prevent quota overflow
154 * later. Real quota accounting is done at pages writeout
155 * time.
156 */
157- if (vfs_dq_reserve_block(inode, total))
158+ if (vfs_dq_reserve_block(inode, total)) {
159+ /*
160+ * We tend to badly over-estimate the amount of
161+ * metadata blocks which are needed, so if we have
162+ * reserved any metadata blocks, try to force out the
163+ * inode and see if we have any better luck.
164+ */
165+ if (md_reserved && retries++ <= 3)
166+ goto retry;
167 return -EDQUOT;
168+ }
169
170 if (ext4_claim_free_blocks(sbi, total)) {
171 vfs_dq_release_reservation_block(inode, total);
172 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
173+ retry:
174+ if (md_reserved)
175+ write_inode_now(inode, (retries == 3));
176 yield();
177 goto repeat;
178 }
179 return -ENOSPC;
180 }
181- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
182- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
183- EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
184- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
185+ spin_lock(&ei->i_block_reservation_lock);
186+ ei->i_reserved_data_blocks += nrblocks;
187+ ei->i_reserved_meta_blocks += md_needed;
188+ spin_unlock(&ei->i_block_reservation_lock);
189
190 return 0; /* success */
191 }
192@@ -1887,49 +1902,45 @@ repeat:
193 static void ext4_da_release_space(struct inode *inode, int to_free)
194 {
195 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
196- int total, mdb, mdb_free, release;
197+ struct ext4_inode_info *ei = EXT4_I(inode);
198
199 if (!to_free)
200 return; /* Nothing to release, exit */
201
202 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
203
204- if (!EXT4_I(inode)->i_reserved_data_blocks) {
205+ if (unlikely(to_free > ei->i_reserved_data_blocks)) {
206 /*
207- * if there is no reserved blocks, but we try to free some
208- * then the counter is messed up somewhere.
209- * but since this function is called from invalidate
210- * page, it's harmless to return without any action
211+ * if there aren't enough reserved blocks, then the
212+ * counter is messed up somewhere. Since this
213+ * function is called from invalidate page, it's
214+ * harmless to return without any action.
215 */
216- printk(KERN_INFO "ext4 delalloc try to release %d reserved "
217- "blocks for inode %lu, but there is no reserved "
218- "data blocks\n", to_free, inode->i_ino);
219- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
220- return;
221+ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
222+ "ino %lu, to_free %d with only %d reserved "
223+ "data blocks\n", inode->i_ino, to_free,
224+ ei->i_reserved_data_blocks);
225+ WARN_ON(1);
226+ to_free = ei->i_reserved_data_blocks;
227 }
228+ ei->i_reserved_data_blocks -= to_free;
229
230- /* recalculate the number of metablocks still need to be reserved */
231- total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
232- mdb = ext4_calc_metadata_amount(inode, total);
233-
234- /* figure out how many metablocks to release */
235- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
236- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
237-
238- release = to_free + mdb_free;
239-
240- /* update fs dirty blocks counter for truncate case */
241- percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
242+ if (ei->i_reserved_data_blocks == 0) {
243+ /*
244+ * We can release all of the reserved metadata blocks
245+ * only when we have written all of the delayed
246+ * allocation blocks.
247+ */
248+ to_free += ei->i_allocated_meta_blocks;
249+ ei->i_allocated_meta_blocks = 0;
250+ }
251
252- /* update per-inode reservations */
253- BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
254- EXT4_I(inode)->i_reserved_data_blocks -= to_free;
255+ /* update fs dirty blocks counter */
256+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
257
258- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
259- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
260 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
261
262- vfs_dq_release_reservation_block(inode, release);
263+ vfs_dq_release_reservation_block(inode, to_free);
264 }
265
266 static void ext4_da_page_release_reservation(struct page *page,