1 From: Joel Becker <joel.becker@oracle.com>
2 Subject: ocfs2: Limit inode allocation to 32bits.
3 Patch-mainline: 2.6.28?
6 ocfs2 inode numbers are block numbers. For any filesystem with less
7 than 2^32 blocks, this is not a problem. However, when ocfs2 starts
8 using JDB2, it will be able to support filesystems with more than 2^32
9 blocks. This would result in inode numbers higher than 2^32.
11 The problem is that stat(2) can't handle those numbers on 32bit
12 machines. The simple solution is to have ocfs2 allocate all inodes
15 The suballoc code is changed to honor an optional block limit. Only the
16 inode suballocator sets that limit - all other allocations stay unlimited.
18 The biggest trick is to grow the inode suballocator beneath that limit.
19 There's no point in allocating block groups that are above the limit,
20 then rejecting their elements later on. We want to prevent the inode
21 allocator from ever having block groups above the limit. This involves
22 a little gyration with the local alloc code. If the local alloc window
23 is above the limit, it signals the caller to try the global bitmap but
24 does not disable the local alloc file (which can be used for other
27 [ Minor cleanup - removed an ML_NOTICE comment. --Mark ]
29 Signed-off-by: Joel Becker <joel.becker@oracle.com>
30 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
32 fs/ocfs2/localalloc.c | 55 +++++++++++++++++++++++++++++++
33 fs/ocfs2/suballoc.c | 86 ++++++++++++++++++++++++++++++++++++++++----------
34 fs/ocfs2/suballoc.h | 11 ++++--
35 3 files changed, 132 insertions(+), 20 deletions(-)
37 --- a/fs/ocfs2/localalloc.c
38 +++ b/fs/ocfs2/localalloc.c
39 @@ -453,6 +453,46 @@ out:
43 +/* Check to see if the local alloc window is within ac->ac_max_block */
44 +static int ocfs2_local_alloc_in_range(struct inode *inode,
45 + struct ocfs2_alloc_context *ac,
48 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
49 + struct ocfs2_dinode *alloc;
50 + struct ocfs2_local_alloc *la;
54 + if (!ac->ac_max_block)
57 + alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
58 + la = OCFS2_LOCAL_ALLOC(alloc);
60 + start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
62 + mlog_errno(-ENOSPC);
67 + * Converting (bm_off + start + bits_wanted) to blocks gives us
68 + * the blkno just past our actual allocation. This is perfect
69 + * to compare with ac_max_block.
71 + block_off = ocfs2_clusters_to_blocks(inode->i_sb,
72 + le32_to_cpu(la->la_bm_off) +
73 + start + bits_wanted);
74 + mlog(0, "Checking %llu against %llu\n",
75 + (unsigned long long)block_off,
76 + (unsigned long long)ac->ac_max_block);
77 + if (block_off > ac->ac_max_block)
84 * make sure we've got at least bitswanted contiguous bits in the
85 * local alloc. You lose them when you drop i_mutex.
86 @@ -524,6 +564,21 @@ int ocfs2_reserve_local_alloc_bits(struc
90 + if (ac->ac_max_block)
91 + mlog(0, "Calling in_range for max block %llu\n",
92 + (unsigned long long)ac->ac_max_block);
94 + if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
97 + * The window is outside ac->ac_max_block.
98 + * This errno tells the caller to keep localalloc enabled
99 + * but to get the allocation from the main bitmap.
105 ac->ac_inode = local_alloc_inode;
106 /* We should never use localalloc from another slot */
107 ac->ac_alloc_slot = osb->slot_num;
108 --- a/fs/ocfs2/suballoc.c
109 +++ b/fs/ocfs2/suballoc.c
110 @@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle
111 struct ocfs2_chain_list *cl);
112 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
113 struct inode *alloc_inode,
114 - struct buffer_head *bh);
115 + struct buffer_head *bh,
118 static int ocfs2_cluster_group_search(struct inode *inode,
119 struct buffer_head *group_bh,
120 u32 bits_wanted, u32 min_bits,
122 u16 *bit_off, u16 *bits_found);
123 static int ocfs2_block_group_search(struct inode *inode,
124 struct buffer_head *group_bh,
125 u32 bits_wanted, u32 min_bits,
127 u16 *bit_off, u16 *bits_found);
128 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
129 struct ocfs2_alloc_context *ac,
130 @@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluste
134 +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
135 + u32 bits_wanted, u64 max_block,
136 + struct ocfs2_alloc_context **ac);
138 static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
140 @@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_ch
142 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
143 struct inode *alloc_inode,
144 - struct buffer_head *bh)
145 + struct buffer_head *bh,
149 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
150 @@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struc
153 cl = &fe->id2.i_chain;
154 - status = ocfs2_reserve_clusters(osb,
155 - le16_to_cpu(cl->cl_cpg),
157 + status = ocfs2_reserve_clusters_with_limit(osb,
158 + le16_to_cpu(cl->cl_cpg),
161 if (status != -ENOSPC)
163 @@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(s
167 - status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
168 + status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
171 if (status != -ENOSPC)
173 @@ -591,6 +599,13 @@ int ocfs2_reserve_new_inode(struct ocfs2
174 (*ac)->ac_group_search = ocfs2_block_group_search;
177 + * stat(2) can't handle i_ino > 32bits, so we tell the
178 + * lower levels not to allocate us a block group past that
181 + (*ac)->ac_max_block = (u32)~0U;
184 * slot is set when we successfully steal inode from other nodes.
185 * It is reset in 3 places:
186 * 1. when we flush the truncate log
187 @@ -670,9 +685,9 @@ bail:
188 /* Callers don't need to care which bitmap (local alloc or main) to
189 * use so we figure it out for them, but unfortunately this clutters
191 -int ocfs2_reserve_clusters(struct ocfs2_super *osb,
193 - struct ocfs2_alloc_context **ac)
194 +static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
195 + u32 bits_wanted, u64 max_block,
196 + struct ocfs2_alloc_context **ac)
200 @@ -686,16 +701,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
203 (*ac)->ac_bits_wanted = bits_wanted;
204 + (*ac)->ac_max_block = max_block;
207 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
208 status = ocfs2_reserve_local_alloc_bits(osb,
211 - if ((status < 0) && (status != -ENOSPC)) {
212 - mlog_errno(status);
214 - } else if (status == -ENOSPC) {
215 + if (status == -ENOSPC) {
216 /* reserve_local_bits will return enospc with
217 * the local alloc inode still locked, so we
218 * can change this safely here. */
219 @@ -704,6 +717,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
220 * can clean up what's left of the local
222 osb->local_alloc_state = OCFS2_LA_DISABLED;
223 + } else if (status == -EFBIG) {
224 + /* The local alloc window is outside ac_max_block.
225 + * use the main bitmap, but don't disable
228 + } else if (status < 0) {
229 + mlog_errno(status);
234 @@ -727,6 +748,13 @@ bail:
238 +int ocfs2_reserve_clusters(struct ocfs2_super *osb,
240 + struct ocfs2_alloc_context **ac)
242 + return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
246 * More or less lifted from ext3. I'll leave their description below:
248 @@ -1009,10 +1037,12 @@ static inline int ocfs2_block_group_reas
249 static int ocfs2_cluster_group_search(struct inode *inode,
250 struct buffer_head *group_bh,
251 u32 bits_wanted, u32 min_bits,
253 u16 *bit_off, u16 *bits_found)
255 int search = -ENOSPC;
258 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
259 u16 tmp_off, tmp_found;
260 unsigned int max_bits, gd_cluster_off;
261 @@ -1046,6 +1076,17 @@ static int ocfs2_cluster_group_search(st
266 + blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
268 + tmp_off + tmp_found);
269 + mlog(0, "Checking %llu against %llu\n",
270 + (unsigned long long)blkoff,
271 + (unsigned long long)max_block);
272 + if (blkoff > max_block)
276 /* ocfs2_block_group_find_clear_bits() might
277 * return success, but we still want to return
278 * -ENOSPC unless it found the minimum number
279 @@ -1063,19 +1104,31 @@ static int ocfs2_cluster_group_search(st
280 static int ocfs2_block_group_search(struct inode *inode,
281 struct buffer_head *group_bh,
282 u32 bits_wanted, u32 min_bits,
284 u16 *bit_off, u16 *bits_found)
288 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
290 BUG_ON(min_bits != 1);
291 BUG_ON(ocfs2_is_cluster_bitmap(inode));
293 - if (bg->bg_free_bits_count)
294 + if (bg->bg_free_bits_count) {
295 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
296 group_bh, bits_wanted,
297 le16_to_cpu(bg->bg_bits),
298 bit_off, bits_found);
299 + if (!ret && max_block) {
300 + blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
302 + mlog(0, "Checking %llu against %llu\n",
303 + (unsigned long long)blkoff,
304 + (unsigned long long)max_block);
305 + if (blkoff > max_block)
312 @@ -1140,7 +1193,7 @@ static int ocfs2_search_one_group(struct
315 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
317 + ac->ac_max_block, bit_off, &found);
321 @@ -1213,7 +1266,8 @@ static int ocfs2_search_chain(struct ocf
322 /* for now, the chain search is a bit simplistic. We just use
323 * the 1st group with any empty bits. */
324 while ((status = ac->ac_group_search(alloc_inode, group_bh,
325 - bits_wanted, min_bits, bit_off,
326 + bits_wanted, min_bits,
327 + ac->ac_max_block, bit_off,
328 &tmp_bits)) == -ENOSPC) {
329 if (!bg->bg_next_group)
331 --- a/fs/ocfs2/suballoc.h
332 +++ b/fs/ocfs2/suballoc.h
335 typedef int (group_search_t)(struct inode *,
336 struct buffer_head *,
341 + u32, /* bits_wanted */
342 + u32, /* min_bits */
343 + u64, /* max_block */
344 + u16 *, /* *bit_off */
345 + u16 *); /* *bits_found */
347 struct ocfs2_alloc_context {
348 struct inode *ac_inode; /* which bitmap are we allocating from? */
349 @@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
350 group_search_t *ac_group_search;
353 + u64 ac_max_block; /* Highest block number to allocate. 0 is
354 + is the same as ~0 - unlimited */
357 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);