]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/2.6.27.47/ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 2.6.27.47 / ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
1 From tytso@mit.edu Mon Apr 19 10:20:41 2010
2 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
3 Date: Mon, 15 Mar 2010 20:25:57 -0400
4 Subject: ext4: Make sure all the block allocation paths reserve blocks
5 To: stable@kernel.org
6 Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
7 Message-ID: <1268699165-17461-4-git-send-email-tytso@mit.edu>
8
9
10 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
11
12 commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream.
13
14 With delayed allocation we need to make sure block are reserved before
15 we attempt to allocate them. Otherwise we get block allocation failure
16 (ENOSPC) during writepages which cannot be handled. This would mean
17 silent data loss (We do a printk stating data will be lost). This patch
18 updates the DIO and fallocate code path to do block reservation before
19 block allocation. This is needed to make sure parallel DIO and fallocate
20 request doesn't take block out of delayed reserve space.
21
22 When free blocks count go below a threshold we switch to a slow patch
23 which looks at other CPU's accumulated percpu counter values.
24
25 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
26 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
27 Signed-off-by: Jayson R. King <dev@jaysonking.com>
28 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
29 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
30
31 ---
32 fs/ext4/balloc.c | 58 +++++++++++++++++++++++++++++++++++++++---------------
33 fs/ext4/ext4.h | 13 ++++++++++++
34 fs/ext4/inode.c | 5 ----
35 fs/ext4/mballoc.c | 23 ++++++++++++---------
36 4 files changed, 69 insertions(+), 30 deletions(-)
37
38 --- a/fs/ext4/balloc.c
39 +++ b/fs/ext4/balloc.c
40 @@ -1754,6 +1754,32 @@ out:
41 return ret;
42 }
43
44 +int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
45 + ext4_fsblk_t nblocks)
46 +{
47 + s64 free_blocks;
48 + ext4_fsblk_t root_blocks = 0;
49 + struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
50 +
51 + free_blocks = percpu_counter_read(fbc);
52 +
53 + if (!capable(CAP_SYS_RESOURCE) &&
54 + sbi->s_resuid != current->fsuid &&
55 + (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
56 + root_blocks = ext4_r_blocks_count(sbi->s_es);
57 +
58 + if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
59 + free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
60 +
61 + if (free_blocks < (root_blocks + nblocks))
62 + /* we don't have free space */
63 + return -ENOSPC;
64 +
65 + /* reduce fs free blocks counter */
66 + percpu_counter_sub(fbc, nblocks);
67 + return 0;
68 +}
69 +
70 /**
71 * ext4_has_free_blocks()
72 * @sbi: in-core super block structure.
73 @@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct
74 sbi->s_resuid != current->fsuid &&
75 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
76 root_blocks = ext4_r_blocks_count(sbi->s_es);
77 -#ifdef CONFIG_SMP
78 - if (free_blocks - root_blocks < FBC_BATCH)
79 - free_blocks =
80 - percpu_counter_sum(&sbi->s_freeblocks_counter);
81 -#endif
82 +
83 + if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
84 + free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
85 +
86 if (free_blocks <= root_blocks)
87 /* we don't have free space */
88 return 0;
89 if (free_blocks - root_blocks < nblocks)
90 return free_blocks - root_blocks;
91 return nblocks;
92 - }
93 +}
94
95
96 /**
97 @@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
98 /*
99 * With delalloc we already reserved the blocks
100 */
101 - *count = ext4_has_free_blocks(sbi, *count);
102 - }
103 - if (*count == 0) {
104 - *errp = -ENOSPC;
105 - return 0; /*return with ENOSPC error */
106 + if (ext4_claim_free_blocks(sbi, *count)) {
107 + *errp = -ENOSPC;
108 + return 0; /*return with ENOSPC error */
109 + }
110 }
111 - num = *count;
112 -
113 /*
114 * Check quota for allocation of this block.
115 */
116 @@ -2067,9 +2089,13 @@ allocated:
117 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
118 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
119 spin_unlock(sb_bgl_lock(sbi, group_no));
120 - if (!EXT4_I(inode)->i_delalloc_reserved_flag)
121 - percpu_counter_sub(&sbi->s_freeblocks_counter, num);
122 -
123 + if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
124 + /*
125 + * we allocated less blocks than we
126 + * claimed. Add the difference back.
127 + */
128 + percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
129 + }
130 if (sbi->s_log_groups_per_flex) {
131 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
132 spin_lock(sb_bgl_lock(sbi, flex_group));
133 --- a/fs/ext4/ext4.h
134 +++ b/fs/ext4/ext4.h
135 @@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand
136 unsigned long *count, int *errp);
137 extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
138 ext4_fsblk_t goal, unsigned long *count, int *errp);
139 +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
140 + ext4_fsblk_t nblocks);
141 extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
142 ext4_fsblk_t nblocks);
143 extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
144 @@ -1245,6 +1247,17 @@ do { \
145 __ext4_std_error((sb), __func__, (errno)); \
146 } while (0)
147
148 +#ifdef CONFIG_SMP
149 +/* Each CPU can accumulate FBC_BATCH blocks in their local
150 + * counters. So we need to make sure we have free blocks more
151 + * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
152 + */
153 +#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
154 +#else
155 +#define EXT4_FREEBLOCKS_WATERMARK 0
156 +#endif
157 +
158 +
159 /*
160 * Inodes and files operations
161 */
162 --- a/fs/ext4/inode.c
163 +++ b/fs/ext4/inode.c
164 @@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct
165 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
166 total = md_needed + nrblocks;
167
168 - if (ext4_has_free_blocks(sbi, total) < total) {
169 + if (ext4_claim_free_blocks(sbi, total)) {
170 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
171 return -ENOSPC;
172 }
173 - /* reduce fs free blocks counter */
174 - percpu_counter_sub(&sbi->s_freeblocks_counter, total);
175 -
176 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
177 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
178
179 --- a/fs/ext4/mballoc.c
180 +++ b/fs/ext4/mballoc.c
181 @@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
182 * at write_begin() time for delayed allocation
183 * do not double accounting
184 */
185 - if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
186 - percpu_counter_sub(&sbi->s_freeblocks_counter,
187 - ac->ac_b_ex.fe_len);
188 + if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
189 + ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
190 + /*
191 + * we allocated less blocks than we calimed
192 + * Add the difference back
193 + */
194 + percpu_counter_add(&sbi->s_freeblocks_counter,
195 + ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
196 + }
197
198 if (sbi->s_log_groups_per_flex) {
199 ext4_group_t flex_group = ext4_flex_group(sbi,
200 @@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
201 /*
202 * With delalloc we already reserved the blocks
203 */
204 - ar->len = ext4_has_free_blocks(sbi, ar->len);
205 - }
206 -
207 - if (ar->len == 0) {
208 - *errp = -ENOSPC;
209 - return 0;
210 + if (ext4_claim_free_blocks(sbi, ar->len)) {
211 + *errp = -ENOSPC;
212 + return 0;
213 + }
214 }
215 -
216 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
217 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
218 ar->len--;