+++ /dev/null
-From: Joel Becker <joel.becker@oracle.com>
-Subject: ocfs2: Limit inode allocation to 32bits.
-Patch-mainline: 2.6.28?
-References: FATE302877
-
-ocfs2 inode numbers are block numbers. For any filesystem with less
-than 2^32 blocks, this is not a problem. However, when ocfs2 starts
-using JDB2, it will be able to support filesystems with more than 2^32
-blocks. This would result in inode numbers higher than 2^32.
-
-The problem is that stat(2) can't handle those numbers on 32bit
-machines. The simple solution is to have ocfs2 allocate all inodes
-below that boundary.
-
-The suballoc code is changed to honor an optional block limit. Only the
-inode suballocator sets that limit - all other allocations stay unlimited.
-
-The biggest trick is to grow the inode suballocator beneath that limit.
-There's no point in allocating block groups that are above the limit,
-then rejecting their elements later on. We want to prevent the inode
-allocator from ever having block groups above the limit. This involves
-a little gyration with the local alloc code. If the local alloc window
-is above the limit, it signals the caller to try the global bitmap but
-does not disable the local alloc file (which can be used for other
-allocations).
-
-[ Minor cleanup - removed an ML_NOTICE comment. --Mark ]
-
-Signed-off-by: Joel Becker <joel.becker@oracle.com>
-Signed-off-by: Mark Fasheh <mfasheh@suse.com>
----
- fs/ocfs2/localalloc.c | 55 +++++++++++++++++++++++++++++++
- fs/ocfs2/suballoc.c | 86 ++++++++++++++++++++++++++++++++++++++++----------
- fs/ocfs2/suballoc.h | 11 ++++--
- 3 files changed, 132 insertions(+), 20 deletions(-)
-
---- a/fs/ocfs2/localalloc.c
-+++ b/fs/ocfs2/localalloc.c
-@@ -453,6 +453,46 @@ out:
- return status;
- }
-
-+/* Check to see if the local alloc window is within ac->ac_max_block */
-+static int ocfs2_local_alloc_in_range(struct inode *inode,
-+ struct ocfs2_alloc_context *ac,
-+ u32 bits_wanted)
-+{
-+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-+ struct ocfs2_dinode *alloc;
-+ struct ocfs2_local_alloc *la;
-+ int start;
-+ u64 block_off;
-+
-+ if (!ac->ac_max_block)
-+ return 1;
-+
-+ alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
-+ la = OCFS2_LOCAL_ALLOC(alloc);
-+
-+ start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
-+ if (start == -1) {
-+ mlog_errno(-ENOSPC);
-+ return 0;
-+ }
-+
-+ /*
-+ * Converting (bm_off + start + bits_wanted) to blocks gives us
-+ * the blkno just past our actual allocation. This is perfect
-+ * to compare with ac_max_block.
-+ */
-+ block_off = ocfs2_clusters_to_blocks(inode->i_sb,
-+ le32_to_cpu(la->la_bm_off) +
-+ start + bits_wanted);
-+ mlog(0, "Checking %llu against %llu\n",
-+ (unsigned long long)block_off,
-+ (unsigned long long)ac->ac_max_block);
-+ if (block_off > ac->ac_max_block)
-+ return 0;
-+
-+ return 1;
-+}
-+
- /*
- * make sure we've got at least bitswanted contiguous bits in the
- * local alloc. You lose them when you drop i_mutex.
-@@ -524,6 +564,21 @@ int ocfs2_reserve_local_alloc_bits(struc
- }
- }
-
-+ if (ac->ac_max_block)
-+ mlog(0, "Calling in_range for max block %llu\n",
-+ (unsigned long long)ac->ac_max_block);
-+
-+ if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
-+ bits_wanted)) {
-+ /*
-+ * The window is outside ac->ac_max_block.
-+ * This errno tells the caller to keep localalloc enabled
-+ * but to get the allocation from the main bitmap.
-+ */
-+ status = -EFBIG;
-+ goto bail;
-+ }
-+
- ac->ac_inode = local_alloc_inode;
- /* We should never use localalloc from another slot */
- ac->ac_alloc_slot = osb->slot_num;
---- a/fs/ocfs2/suballoc.c
-+++ b/fs/ocfs2/suballoc.c
-@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle
- struct ocfs2_chain_list *cl);
- static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
- struct inode *alloc_inode,
-- struct buffer_head *bh);
-+ struct buffer_head *bh,
-+ u64 max_block);
-
- static int ocfs2_cluster_group_search(struct inode *inode,
- struct buffer_head *group_bh,
- u32 bits_wanted, u32 min_bits,
-+ u64 max_block,
- u16 *bit_off, u16 *bits_found);
- static int ocfs2_block_group_search(struct inode *inode,
- struct buffer_head *group_bh,
- u32 bits_wanted, u32 min_bits,
-+ u64 max_block,
- u16 *bit_off, u16 *bits_found);
- static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
- struct ocfs2_alloc_context *ac,
-@@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluste
- u64 data_blkno,
- u64 *bg_blkno,
- u16 *bg_bit_off);
-+static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
-+ u32 bits_wanted, u64 max_block,
-+ struct ocfs2_alloc_context **ac);
-
- static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
- {
-@@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_ch
- */
- static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
- struct inode *alloc_inode,
-- struct buffer_head *bh)
-+ struct buffer_head *bh,
-+ u64 max_block)
- {
- int status, credits;
- struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
-@@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struc
- mlog_entry_void();
-
- cl = &fe->id2.i_chain;
-- status = ocfs2_reserve_clusters(osb,
-- le16_to_cpu(cl->cl_cpg),
-- &ac);
-+ status = ocfs2_reserve_clusters_with_limit(osb,
-+ le16_to_cpu(cl->cl_cpg),
-+ max_block, &ac);
- if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
-@@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(s
- goto bail;
- }
-
-- status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
-+ status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
-+ ac->ac_max_block);
- if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
-@@ -591,6 +599,13 @@ int ocfs2_reserve_new_inode(struct ocfs2
- (*ac)->ac_group_search = ocfs2_block_group_search;
-
- /*
-+ * stat(2) can't handle i_ino > 32bits, so we tell the
-+ * lower levels not to allocate us a block group past that
-+ * limit.
-+ */
-+ (*ac)->ac_max_block = (u32)~0U;
-+
-+ /*
- * slot is set when we successfully steal inode from other nodes.
- * It is reset in 3 places:
- * 1. when we flush the truncate log
-@@ -670,9 +685,9 @@ bail:
- /* Callers don't need to care which bitmap (local alloc or main) to
- * use so we figure it out for them, but unfortunately this clutters
- * things a bit. */
--int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-- u32 bits_wanted,
-- struct ocfs2_alloc_context **ac)
-+static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
-+ u32 bits_wanted, u64 max_block,
-+ struct ocfs2_alloc_context **ac)
- {
- int status;
-
-@@ -686,16 +701,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
- }
-
- (*ac)->ac_bits_wanted = bits_wanted;
-+ (*ac)->ac_max_block = max_block;
-
- status = -ENOSPC;
- if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
- status = ocfs2_reserve_local_alloc_bits(osb,
- bits_wanted,
- *ac);
-- if ((status < 0) && (status != -ENOSPC)) {
-- mlog_errno(status);
-- goto bail;
-- } else if (status == -ENOSPC) {
-+ if (status == -ENOSPC) {
- /* reserve_local_bits will return enospc with
- * the local alloc inode still locked, so we
- * can change this safely here. */
-@@ -704,6 +717,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
- * can clean up what's left of the local
- * allocation */
- osb->local_alloc_state = OCFS2_LA_DISABLED;
-+ } else if (status == -EFBIG) {
-+ /* The local alloc window is outside ac_max_block.
-+ * use the main bitmap, but don't disable
-+ * local alloc. */
-+ status = -ENOSPC;
-+ } else if (status < 0) {
-+ mlog_errno(status);
-+ goto bail;
- }
- }
-
-@@ -727,6 +748,13 @@ bail:
- return status;
- }
-
-+int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-+ u32 bits_wanted,
-+ struct ocfs2_alloc_context **ac)
-+{
-+ return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
-+}
-+
- /*
- * More or less lifted from ext3. I'll leave their description below:
- *
-@@ -1009,10 +1037,12 @@ static inline int ocfs2_block_group_reas
- static int ocfs2_cluster_group_search(struct inode *inode,
- struct buffer_head *group_bh,
- u32 bits_wanted, u32 min_bits,
-+ u64 max_block,
- u16 *bit_off, u16 *bits_found)
- {
- int search = -ENOSPC;
- int ret;
-+ u64 blkoff;
- struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
- u16 tmp_off, tmp_found;
- unsigned int max_bits, gd_cluster_off;
-@@ -1046,6 +1076,17 @@ static int ocfs2_cluster_group_search(st
- if (ret)
- return ret;
-
-+ if (max_block) {
-+ blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
-+ gd_cluster_off +
-+ tmp_off + tmp_found);
-+ mlog(0, "Checking %llu against %llu\n",
-+ (unsigned long long)blkoff,
-+ (unsigned long long)max_block);
-+ if (blkoff > max_block)
-+ return -ENOSPC;
-+ }
-+
- /* ocfs2_block_group_find_clear_bits() might
- * return success, but we still want to return
- * -ENOSPC unless it found the minimum number
-@@ -1063,19 +1104,31 @@ static int ocfs2_cluster_group_search(st
- static int ocfs2_block_group_search(struct inode *inode,
- struct buffer_head *group_bh,
- u32 bits_wanted, u32 min_bits,
-+ u64 max_block,
- u16 *bit_off, u16 *bits_found)
- {
- int ret = -ENOSPC;
-+ u64 blkoff;
- struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
-
- BUG_ON(min_bits != 1);
- BUG_ON(ocfs2_is_cluster_bitmap(inode));
-
-- if (bg->bg_free_bits_count)
-+ if (bg->bg_free_bits_count) {
- ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
- group_bh, bits_wanted,
- le16_to_cpu(bg->bg_bits),
- bit_off, bits_found);
-+ if (!ret && max_block) {
-+ blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
-+ *bits_found;
-+ mlog(0, "Checking %llu against %llu\n",
-+ (unsigned long long)blkoff,
-+ (unsigned long long)max_block);
-+ if (blkoff > max_block)
-+ ret = -ENOSPC;
-+ }
-+ }
-
- return ret;
- }
-@@ -1140,7 +1193,7 @@ static int ocfs2_search_one_group(struct
- }
-
- ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
-- bit_off, &found);
-+ ac->ac_max_block, bit_off, &found);
- if (ret < 0) {
- if (ret != -ENOSPC)
- mlog_errno(ret);
-@@ -1213,7 +1266,8 @@ static int ocfs2_search_chain(struct ocf
- /* for now, the chain search is a bit simplistic. We just use
- * the 1st group with any empty bits. */
- while ((status = ac->ac_group_search(alloc_inode, group_bh,
-- bits_wanted, min_bits, bit_off,
-+ bits_wanted, min_bits,
-+ ac->ac_max_block, bit_off,
- &tmp_bits)) == -ENOSPC) {
- if (!bg->bg_next_group)
- break;
---- a/fs/ocfs2/suballoc.h
-+++ b/fs/ocfs2/suballoc.h
-@@ -28,10 +28,11 @@
-
- typedef int (group_search_t)(struct inode *,
- struct buffer_head *,
-- u32,
-- u32,
-- u16 *,
-- u16 *);
-+ u32, /* bits_wanted */
-+ u32, /* min_bits */
-+ u64, /* max_block */
-+ u16 *, /* *bit_off */
-+ u16 *); /* *bits_found */
-
- struct ocfs2_alloc_context {
- struct inode *ac_inode; /* which bitmap are we allocating from? */
-@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
- group_search_t *ac_group_search;
-
- u64 ac_last_group;
-+ u64 ac_max_block; /* Highest block number to allocate. 0 is
-+ is the same as ~0 - unlimited */
- };
-
- void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);