]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Joel Becker <joel.becker@oracle.com> |
2 | Subject: ocfs2: Limit inode allocation to 32bits. | |
3 | Patch-mainline: 2.6.28? | |
4 | References: FATE302877 | |
5 | ||
6 | ocfs2 inode numbers are block numbers. For any filesystem with less | |
7 | than 2^32 blocks, this is not a problem. However, when ocfs2 starts | |
8 | using JDB2, it will be able to support filesystems with more than 2^32 | |
9 | blocks. This would result in inode numbers higher than 2^32. | |
10 | ||
11 | The problem is that stat(2) can't handle those numbers on 32bit | |
12 | machines. The simple solution is to have ocfs2 allocate all inodes | |
13 | below that boundary. | |
14 | ||
15 | The suballoc code is changed to honor an optional block limit. Only the | |
16 | inode suballocator sets that limit - all other allocations stay unlimited. | |
17 | ||
18 | The biggest trick is to grow the inode suballocator beneath that limit. | |
19 | There's no point in allocating block groups that are above the limit, | |
20 | then rejecting their elements later on. We want to prevent the inode | |
21 | allocator from ever having block groups above the limit. This involves | |
22 | a little gyration with the local alloc code. If the local alloc window | |
23 | is above the limit, it signals the caller to try the global bitmap but | |
24 | does not disable the local alloc file (which can be used for other | |
25 | allocations). | |
26 | ||
27 | [ Minor cleanup - removed an ML_NOTICE comment. --Mark ] | |
28 | ||
29 | Signed-off-by: Joel Becker <joel.becker@oracle.com> | |
30 | Signed-off-by: Mark Fasheh <mfasheh@suse.com> | |
31 | --- | |
32 | fs/ocfs2/localalloc.c | 55 +++++++++++++++++++++++++++++++ | |
33 | fs/ocfs2/suballoc.c | 86 ++++++++++++++++++++++++++++++++++++++++---------- | |
34 | fs/ocfs2/suballoc.h | 11 ++++-- | |
35 | 3 files changed, 132 insertions(+), 20 deletions(-) | |
36 | ||
37 | --- a/fs/ocfs2/localalloc.c | |
38 | +++ b/fs/ocfs2/localalloc.c | |
39 | @@ -453,6 +453,46 @@ out: | |
40 | return status; | |
41 | } | |
42 | ||
43 | +/* Check to see if the local alloc window is within ac->ac_max_block */ | |
44 | +static int ocfs2_local_alloc_in_range(struct inode *inode, | |
45 | + struct ocfs2_alloc_context *ac, | |
46 | + u32 bits_wanted) | |
47 | +{ | |
48 | + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
49 | + struct ocfs2_dinode *alloc; | |
50 | + struct ocfs2_local_alloc *la; | |
51 | + int start; | |
52 | + u64 block_off; | |
53 | + | |
54 | + if (!ac->ac_max_block) | |
55 | + return 1; | |
56 | + | |
57 | + alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | |
58 | + la = OCFS2_LOCAL_ALLOC(alloc); | |
59 | + | |
60 | + start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | |
61 | + if (start == -1) { | |
62 | + mlog_errno(-ENOSPC); | |
63 | + return 0; | |
64 | + } | |
65 | + | |
66 | + /* | |
67 | + * Converting (bm_off + start + bits_wanted) to blocks gives us | |
68 | + * the blkno just past our actual allocation. This is perfect | |
69 | + * to compare with ac_max_block. | |
70 | + */ | |
71 | + block_off = ocfs2_clusters_to_blocks(inode->i_sb, | |
72 | + le32_to_cpu(la->la_bm_off) + | |
73 | + start + bits_wanted); | |
74 | + mlog(0, "Checking %llu against %llu\n", | |
75 | + (unsigned long long)block_off, | |
76 | + (unsigned long long)ac->ac_max_block); | |
77 | + if (block_off > ac->ac_max_block) | |
78 | + return 0; | |
79 | + | |
80 | + return 1; | |
81 | +} | |
82 | + | |
83 | /* | |
84 | * make sure we've got at least bitswanted contiguous bits in the | |
85 | * local alloc. You lose them when you drop i_mutex. | |
86 | @@ -524,6 +564,21 @@ int ocfs2_reserve_local_alloc_bits(struc | |
87 | } | |
88 | } | |
89 | ||
90 | + if (ac->ac_max_block) | |
91 | + mlog(0, "Calling in_range for max block %llu\n", | |
92 | + (unsigned long long)ac->ac_max_block); | |
93 | + | |
94 | + if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, | |
95 | + bits_wanted)) { | |
96 | + /* | |
97 | + * The window is outside ac->ac_max_block. | |
98 | + * This errno tells the caller to keep localalloc enabled | |
99 | + * but to get the allocation from the main bitmap. | |
100 | + */ | |
101 | + status = -EFBIG; | |
102 | + goto bail; | |
103 | + } | |
104 | + | |
105 | ac->ac_inode = local_alloc_inode; | |
106 | /* We should never use localalloc from another slot */ | |
107 | ac->ac_alloc_slot = osb->slot_num; | |
108 | --- a/fs/ocfs2/suballoc.c | |
109 | +++ b/fs/ocfs2/suballoc.c | |
110 | @@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle | |
111 | struct ocfs2_chain_list *cl); | |
112 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |
113 | struct inode *alloc_inode, | |
114 | - struct buffer_head *bh); | |
115 | + struct buffer_head *bh, | |
116 | + u64 max_block); | |
117 | ||
118 | static int ocfs2_cluster_group_search(struct inode *inode, | |
119 | struct buffer_head *group_bh, | |
120 | u32 bits_wanted, u32 min_bits, | |
121 | + u64 max_block, | |
122 | u16 *bit_off, u16 *bits_found); | |
123 | static int ocfs2_block_group_search(struct inode *inode, | |
124 | struct buffer_head *group_bh, | |
125 | u32 bits_wanted, u32 min_bits, | |
126 | + u64 max_block, | |
127 | u16 *bit_off, u16 *bits_found); | |
128 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |
129 | struct ocfs2_alloc_context *ac, | |
130 | @@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluste | |
131 | u64 data_blkno, | |
132 | u64 *bg_blkno, | |
133 | u16 *bg_bit_off); | |
134 | +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | |
135 | + u32 bits_wanted, u64 max_block, | |
136 | + struct ocfs2_alloc_context **ac); | |
137 | ||
138 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |
139 | { | |
140 | @@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_ch | |
141 | */ | |
142 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |
143 | struct inode *alloc_inode, | |
144 | - struct buffer_head *bh) | |
145 | + struct buffer_head *bh, | |
146 | + u64 max_block) | |
147 | { | |
148 | int status, credits; | |
149 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | |
150 | @@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struc | |
151 | mlog_entry_void(); | |
152 | ||
153 | cl = &fe->id2.i_chain; | |
154 | - status = ocfs2_reserve_clusters(osb, | |
155 | - le16_to_cpu(cl->cl_cpg), | |
156 | - &ac); | |
157 | + status = ocfs2_reserve_clusters_with_limit(osb, | |
158 | + le16_to_cpu(cl->cl_cpg), | |
159 | + max_block, &ac); | |
160 | if (status < 0) { | |
161 | if (status != -ENOSPC) | |
162 | mlog_errno(status); | |
163 | @@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(s | |
164 | goto bail; | |
165 | } | |
166 | ||
167 | - status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | |
168 | + status = ocfs2_block_group_alloc(osb, alloc_inode, bh, | |
169 | + ac->ac_max_block); | |
170 | if (status < 0) { | |
171 | if (status != -ENOSPC) | |
172 | mlog_errno(status); | |
173 | @@ -591,6 +599,13 @@ int ocfs2_reserve_new_inode(struct ocfs2 | |
174 | (*ac)->ac_group_search = ocfs2_block_group_search; | |
175 | ||
176 | /* | |
177 | + * stat(2) can't handle i_ino > 32bits, so we tell the | |
178 | + * lower levels not to allocate us a block group past that | |
179 | + * limit. | |
180 | + */ | |
181 | + (*ac)->ac_max_block = (u32)~0U; | |
182 | + | |
183 | + /* | |
184 | * slot is set when we successfully steal inode from other nodes. | |
185 | * It is reset in 3 places: | |
186 | * 1. when we flush the truncate log | |
187 | @@ -670,9 +685,9 @@ bail: | |
188 | /* Callers don't need to care which bitmap (local alloc or main) to | |
189 | * use so we figure it out for them, but unfortunately this clutters | |
190 | * things a bit. */ | |
191 | -int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |
192 | - u32 bits_wanted, | |
193 | - struct ocfs2_alloc_context **ac) | |
194 | +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | |
195 | + u32 bits_wanted, u64 max_block, | |
196 | + struct ocfs2_alloc_context **ac) | |
197 | { | |
198 | int status; | |
199 | ||
200 | @@ -686,16 +701,14 @@ int ocfs2_reserve_clusters(struct ocfs2_ | |
201 | } | |
202 | ||
203 | (*ac)->ac_bits_wanted = bits_wanted; | |
204 | + (*ac)->ac_max_block = max_block; | |
205 | ||
206 | status = -ENOSPC; | |
207 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | |
208 | status = ocfs2_reserve_local_alloc_bits(osb, | |
209 | bits_wanted, | |
210 | *ac); | |
211 | - if ((status < 0) && (status != -ENOSPC)) { | |
212 | - mlog_errno(status); | |
213 | - goto bail; | |
214 | - } else if (status == -ENOSPC) { | |
215 | + if (status == -ENOSPC) { | |
216 | /* reserve_local_bits will return enospc with | |
217 | * the local alloc inode still locked, so we | |
218 | * can change this safely here. */ | |
219 | @@ -704,6 +717,14 @@ int ocfs2_reserve_clusters(struct ocfs2_ | |
220 | * can clean up what's left of the local | |
221 | * allocation */ | |
222 | osb->local_alloc_state = OCFS2_LA_DISABLED; | |
223 | + } else if (status == -EFBIG) { | |
224 | + /* The local alloc window is outside ac_max_block. | |
225 | + * use the main bitmap, but don't disable | |
226 | + * local alloc. */ | |
227 | + status = -ENOSPC; | |
228 | + } else if (status < 0) { | |
229 | + mlog_errno(status); | |
230 | + goto bail; | |
231 | } | |
232 | } | |
233 | ||
234 | @@ -727,6 +748,13 @@ bail: | |
235 | return status; | |
236 | } | |
237 | ||
238 | +int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |
239 | + u32 bits_wanted, | |
240 | + struct ocfs2_alloc_context **ac) | |
241 | +{ | |
242 | + return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); | |
243 | +} | |
244 | + | |
245 | /* | |
246 | * More or less lifted from ext3. I'll leave their description below: | |
247 | * | |
248 | @@ -1009,10 +1037,12 @@ static inline int ocfs2_block_group_reas | |
249 | static int ocfs2_cluster_group_search(struct inode *inode, | |
250 | struct buffer_head *group_bh, | |
251 | u32 bits_wanted, u32 min_bits, | |
252 | + u64 max_block, | |
253 | u16 *bit_off, u16 *bits_found) | |
254 | { | |
255 | int search = -ENOSPC; | |
256 | int ret; | |
257 | + u64 blkoff; | |
258 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; | |
259 | u16 tmp_off, tmp_found; | |
260 | unsigned int max_bits, gd_cluster_off; | |
261 | @@ -1046,6 +1076,17 @@ static int ocfs2_cluster_group_search(st | |
262 | if (ret) | |
263 | return ret; | |
264 | ||
265 | + if (max_block) { | |
266 | + blkoff = ocfs2_clusters_to_blocks(inode->i_sb, | |
267 | + gd_cluster_off + | |
268 | + tmp_off + tmp_found); | |
269 | + mlog(0, "Checking %llu against %llu\n", | |
270 | + (unsigned long long)blkoff, | |
271 | + (unsigned long long)max_block); | |
272 | + if (blkoff > max_block) | |
273 | + return -ENOSPC; | |
274 | + } | |
275 | + | |
276 | /* ocfs2_block_group_find_clear_bits() might | |
277 | * return success, but we still want to return | |
278 | * -ENOSPC unless it found the minimum number | |
279 | @@ -1063,19 +1104,31 @@ static int ocfs2_cluster_group_search(st | |
280 | static int ocfs2_block_group_search(struct inode *inode, | |
281 | struct buffer_head *group_bh, | |
282 | u32 bits_wanted, u32 min_bits, | |
283 | + u64 max_block, | |
284 | u16 *bit_off, u16 *bits_found) | |
285 | { | |
286 | int ret = -ENOSPC; | |
287 | + u64 blkoff; | |
288 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | |
289 | ||
290 | BUG_ON(min_bits != 1); | |
291 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); | |
292 | ||
293 | - if (bg->bg_free_bits_count) | |
294 | + if (bg->bg_free_bits_count) { | |
295 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | |
296 | group_bh, bits_wanted, | |
297 | le16_to_cpu(bg->bg_bits), | |
298 | bit_off, bits_found); | |
299 | + if (!ret && max_block) { | |
300 | + blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + | |
301 | + *bits_found; | |
302 | + mlog(0, "Checking %llu against %llu\n", | |
303 | + (unsigned long long)blkoff, | |
304 | + (unsigned long long)max_block); | |
305 | + if (blkoff > max_block) | |
306 | + ret = -ENOSPC; | |
307 | + } | |
308 | + } | |
309 | ||
310 | return ret; | |
311 | } | |
312 | @@ -1140,7 +1193,7 @@ static int ocfs2_search_one_group(struct | |
313 | } | |
314 | ||
315 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | |
316 | - bit_off, &found); | |
317 | + ac->ac_max_block, bit_off, &found); | |
318 | if (ret < 0) { | |
319 | if (ret != -ENOSPC) | |
320 | mlog_errno(ret); | |
321 | @@ -1213,7 +1266,8 @@ static int ocfs2_search_chain(struct ocf | |
322 | /* for now, the chain search is a bit simplistic. We just use | |
323 | * the 1st group with any empty bits. */ | |
324 | while ((status = ac->ac_group_search(alloc_inode, group_bh, | |
325 | - bits_wanted, min_bits, bit_off, | |
326 | + bits_wanted, min_bits, | |
327 | + ac->ac_max_block, bit_off, | |
328 | &tmp_bits)) == -ENOSPC) { | |
329 | if (!bg->bg_next_group) | |
330 | break; | |
331 | --- a/fs/ocfs2/suballoc.h | |
332 | +++ b/fs/ocfs2/suballoc.h | |
333 | @@ -28,10 +28,11 @@ | |
334 | ||
335 | typedef int (group_search_t)(struct inode *, | |
336 | struct buffer_head *, | |
337 | - u32, | |
338 | - u32, | |
339 | - u16 *, | |
340 | - u16 *); | |
341 | + u32, /* bits_wanted */ | |
342 | + u32, /* min_bits */ | |
343 | + u64, /* max_block */ | |
344 | + u16 *, /* *bit_off */ | |
345 | + u16 *); /* *bits_found */ | |
346 | ||
347 | struct ocfs2_alloc_context { | |
348 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | |
349 | @@ -51,6 +52,8 @@ struct ocfs2_alloc_context { | |
350 | group_search_t *ac_group_search; | |
351 | ||
352 | u64 ac_last_group; | |
353 | + u64 ac_max_block; /* Highest block number to allocate. 0 is | |
354 | + is the same as ~0 - unlimited */ | |
355 | }; | |
356 | ||
357 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |