]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/2.6.32.17/0009-ext4-Calculate-metadata-requirements-more-accurately.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 2.6.32.17 / 0009-ext4-Calculate-metadata-requirements-more-accurately.patch
1 From 665d82f8d039371ba402227e99d3b95078c97fb9 Mon Sep 17 00:00:00 2001
2 From: Theodore Ts'o <tytso@mit.edu>
3 Date: Sun, 30 May 2010 22:49:23 -0400
4 Subject: ext4: Calculate metadata requirements more accurately
5
6 commit 9d0be50230b333005635967f7ecd4897dbfd181b upstream (as of v2.6.33-rc3)
7
8 In the past, ext4_calc_metadata_amount(), and its sub-functions
9 ext4_ext_calc_metadata_amount() and ext4_indirect_calc_metadata_amount()
10 badly over-estimated the number of metadata blocks that might be
11 required for delayed allocation blocks. This didn't matter as much
12 when functions which managed the reserved metadata blocks were more
13 aggressive about dropping reserved metadata blocks as delayed
14 allocation blocks were written, but unfortunately they were too
15 aggressive. This was fixed in commit 0637c6f, but as a result the
16 over-estimation by ext4_calc_metadata_amount() would lead to reserving
17 2-3 times the number of pending delayed allocation blocks as
18 potentially required metadata blocks. So if there are 1 megabytes of
19 blocks which have been not yet been allocation, up to 3 megabytes of
20 space would get reserved out of the user's quota and from the file
21 system free space pool until all of the inode's data blocks have been
22 allocated.
23
24 This commit addresses this problem by much more accurately estimating
25 the number of metadata blocks that will be required. It will still
26 somewhat over-estimate the number of blocks needed, since it must make
27 a worst case estimate not knowing which physical blocks will be
28 needed, but it is much more accurate than before.
29
30 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
31 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
32 ---
33 fs/ext4/ext4.h | 2 +
34 fs/ext4/ext4_extents.h | 3 +-
35 fs/ext4/extents.c | 49 ++++++++++++++++++++++++-------------
36 fs/ext4/inode.c | 62 +++++++++++++++++++++++++++--------------------
37 fs/ext4/super.c | 1 +
38 5 files changed, 73 insertions(+), 44 deletions(-)
39
40 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
41 index 4a825c1..23bfbbc 100644
42 --- a/fs/ext4/ext4.h
43 +++ b/fs/ext4/ext4.h
44 @@ -693,6 +693,8 @@ struct ext4_inode_info {
45 unsigned int i_reserved_meta_blocks;
46 unsigned int i_allocated_meta_blocks;
47 unsigned short i_delalloc_reserved_flag;
48 + sector_t i_da_metadata_calc_last_lblock;
49 + int i_da_metadata_calc_len;
50
51 /* on-disk additional length */
52 __u16 i_extra_isize;
53 diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
54 index 2ca6864..bdb6ce7 100644
55 --- a/fs/ext4/ext4_extents.h
56 +++ b/fs/ext4/ext4_extents.h
57 @@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
58 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
59 }
60
61 -extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
62 +extern int ext4_ext_calc_metadata_amount(struct inode *inode,
63 + sector_t lblocks);
64 extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
65 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
66 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
67 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
68 index b14fb6d..5f03f9f 100644
69 --- a/fs/ext4/extents.c
70 +++ b/fs/ext4/extents.c
71 @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
72 * to allocate @blocks
73 * Worse case is one block per extent
74 */
75 -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
76 +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
77 {
78 - int lcap, icap, rcap, leafs, idxs, num;
79 - int newextents = blocks;
80 -
81 - rcap = ext4_ext_space_root_idx(inode, 0);
82 - lcap = ext4_ext_space_block(inode, 0);
83 - icap = ext4_ext_space_block_idx(inode, 0);
84 + struct ext4_inode_info *ei = EXT4_I(inode);
85 + int idxs, num = 0;
86
87 - /* number of new leaf blocks needed */
88 - num = leafs = (newextents + lcap - 1) / lcap;
89 + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
90 + / sizeof(struct ext4_extent_idx));
91
92 /*
93 - * Worse case, we need separate index block(s)
94 - * to link all new leaf blocks
95 + * If the new delayed allocation block is contiguous with the
96 + * previous da block, it can share index blocks with the
97 + * previous block, so we only need to allocate a new index
98 + * block every idxs leaf blocks. At ldxs**2 blocks, we need
99 + * an additional index block, and at ldxs**3 blocks, yet
100 + * another index blocks.
101 */
102 - idxs = (leafs + icap - 1) / icap;
103 - do {
104 - num += idxs;
105 - idxs = (idxs + icap - 1) / icap;
106 - } while (idxs > rcap);
107 + if (ei->i_da_metadata_calc_len &&
108 + ei->i_da_metadata_calc_last_lblock+1 == lblock) {
109 + if ((ei->i_da_metadata_calc_len % idxs) == 0)
110 + num++;
111 + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
112 + num++;
113 + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
114 + num++;
115 + ei->i_da_metadata_calc_len = 0;
116 + } else
117 + ei->i_da_metadata_calc_len++;
118 + ei->i_da_metadata_calc_last_lblock++;
119 + return num;
120 + }
121
122 - return num;
123 + /*
124 + * In the worst case we need a new set of index blocks at
125 + * every level of the inode's extent tree.
126 + */
127 + ei->i_da_metadata_calc_len = 1;
128 + ei->i_da_metadata_calc_last_lblock = lblock;
129 + return ext_depth(inode) + 1;
130 }
131
132 static int
133 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
134 index 533bb84..2e3f422 100644
135 --- a/fs/ext4/inode.c
136 +++ b/fs/ext4/inode.c
137 @@ -1051,38 +1051,44 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
138 return &EXT4_I(inode)->i_reserved_quota;
139 }
140 #endif
141 +
142 /*
143 * Calculate the number of metadata blocks need to reserve
144 - * to allocate @blocks for non extent file based file
145 + * to allocate a new block at @lblocks for non extent file based file
146 */
147 -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
148 +static int ext4_indirect_calc_metadata_amount(struct inode *inode,
149 + sector_t lblock)
150 {
151 - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
152 - int ind_blks, dind_blks, tind_blks;
153 -
154 - /* number of new indirect blocks needed */
155 - ind_blks = (blocks + icap - 1) / icap;
156 + struct ext4_inode_info *ei = EXT4_I(inode);
157 + int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
158 + int blk_bits;
159
160 - dind_blks = (ind_blks + icap - 1) / icap;
161 + if (lblock < EXT4_NDIR_BLOCKS)
162 + return 0;
163
164 - tind_blks = 1;
165 + lblock -= EXT4_NDIR_BLOCKS;
166
167 - return ind_blks + dind_blks + tind_blks;
168 + if (ei->i_da_metadata_calc_len &&
169 + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
170 + ei->i_da_metadata_calc_len++;
171 + return 0;
172 + }
173 + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
174 + ei->i_da_metadata_calc_len = 1;
175 + blk_bits = roundup_pow_of_two(lblock + 1);
176 + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
177 }
178
179 /*
180 * Calculate the number of metadata blocks need to reserve
181 - * to allocate given number of blocks
182 + * to allocate a block located at @lblock
183 */
184 -static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
185 +static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
186 {
187 - if (!blocks)
188 - return 0;
189 -
190 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
191 - return ext4_ext_calc_metadata_amount(inode, blocks);
192 + return ext4_ext_calc_metadata_amount(inode, lblock);
193
194 - return ext4_indirect_calc_metadata_amount(inode, blocks);
195 + return ext4_indirect_calc_metadata_amount(inode, lblock);
196 }
197
198 /*
199 @@ -1120,6 +1126,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
200 */
201 mdb_free = ei->i_reserved_meta_blocks;
202 ei->i_reserved_meta_blocks = 0;
203 + ei->i_da_metadata_calc_len = 0;
204 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
205 }
206 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
207 @@ -1844,12 +1851,15 @@ static int ext4_journalled_write_end(struct file *file,
208 return ret ? ret : copied;
209 }
210
211 -static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
212 +/*
213 + * Reserve a single block located at lblock
214 + */
215 +static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
216 {
217 int retries = 0;
218 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
219 struct ext4_inode_info *ei = EXT4_I(inode);
220 - unsigned long md_needed, md_reserved, total = 0;
221 + unsigned long md_needed, md_reserved;
222
223 /*
224 * recalculate the amount of metadata blocks to reserve
225 @@ -1859,8 +1869,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
226 repeat:
227 spin_lock(&ei->i_block_reservation_lock);
228 md_reserved = ei->i_reserved_meta_blocks;
229 - md_needed = ext4_calc_metadata_amount(inode, nrblocks);
230 - total = md_needed + nrblocks;
231 + md_needed = ext4_calc_metadata_amount(inode, lblock);
232 spin_unlock(&ei->i_block_reservation_lock);
233
234 /*
235 @@ -1868,7 +1877,7 @@ repeat:
236 * later. Real quota accounting is done at pages writeout
237 * time.
238 */
239 - if (vfs_dq_reserve_block(inode, total)) {
240 + if (vfs_dq_reserve_block(inode, md_needed + 1)) {
241 /*
242 * We tend to badly over-estimate the amount of
243 * metadata blocks which are needed, so if we have
244 @@ -1880,8 +1889,8 @@ repeat:
245 return -EDQUOT;
246 }
247
248 - if (ext4_claim_free_blocks(sbi, total)) {
249 - vfs_dq_release_reservation_block(inode, total);
250 + if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
251 + vfs_dq_release_reservation_block(inode, md_needed + 1);
252 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
253 retry:
254 if (md_reserved)
255 @@ -1892,7 +1901,7 @@ repeat:
256 return -ENOSPC;
257 }
258 spin_lock(&ei->i_block_reservation_lock);
259 - ei->i_reserved_data_blocks += nrblocks;
260 + ei->i_reserved_data_blocks++;
261 ei->i_reserved_meta_blocks += md_needed;
262 spin_unlock(&ei->i_block_reservation_lock);
263
264 @@ -1933,6 +1942,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
265 */
266 to_free += ei->i_reserved_meta_blocks;
267 ei->i_reserved_meta_blocks = 0;
268 + ei->i_da_metadata_calc_len = 0;
269 }
270
271 /* update fs dirty blocks counter */
272 @@ -2546,7 +2556,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
273 * XXX: __block_prepare_write() unmaps passed block,
274 * is it OK?
275 */
276 - ret = ext4_da_reserve_space(inode, 1);
277 + ret = ext4_da_reserve_space(inode, iblock);
278 if (ret)
279 /* not enough space to reserve */
280 return ret;
281 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
282 index 92943f2..252f30b 100644
283 --- a/fs/ext4/super.c
284 +++ b/fs/ext4/super.c
285 @@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
286 ei->i_reserved_data_blocks = 0;
287 ei->i_reserved_meta_blocks = 0;
288 ei->i_allocated_meta_blocks = 0;
289 + ei->i_da_metadata_calc_len = 0;
290 ei->i_delalloc_reserved_flag = 0;
291 spin_lock_init(&(ei->i_block_reservation_lock));
292 #ifdef CONFIG_QUOTA
293 --
294 1.7.1
295