]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.suse/ocfs2-Limit-inode-allocation-to-32bits.patch
Move xen patchset to new version's subdir.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / ocfs2-Limit-inode-allocation-to-32bits.patch
diff --git a/src/patches/suse-2.6.27.31/patches.suse/ocfs2-Limit-inode-allocation-to-32bits.patch b/src/patches/suse-2.6.27.31/patches.suse/ocfs2-Limit-inode-allocation-to-32bits.patch
new file mode 100644 (file)
index 0000000..775f3b3
--- /dev/null
@@ -0,0 +1,357 @@
+From: Joel Becker <joel.becker@oracle.com>
+Subject: ocfs2: Limit inode allocation to 32bits.
+Patch-mainline: 2.6.28?
+References: FATE302877
+
+ocfs2 inode numbers are block numbers.  For any filesystem with less
+than 2^32 blocks, this is not a problem.  However, when ocfs2 starts
+using JDB2, it will be able to support filesystems with more than 2^32
+blocks.  This would result in inode numbers higher than 2^32.
+
+The problem is that stat(2) can't handle those numbers on 32bit
+machines.  The simple solution is to have ocfs2 allocate all inodes
+below that boundary.
+
+The suballoc code is changed to honor an optional block limit.  Only the
+inode suballocator sets that limit - all other allocations stay unlimited.
+
+The biggest trick is to grow the inode suballocator beneath that limit.
+There's no point in allocating block groups that are above the limit,
+then rejecting their elements later on.  We want to prevent the inode
+allocator from ever having block groups above the limit.  This involves
+a little gyration with the local alloc code.  If the local alloc window
+is above the limit, it signals the caller to try the global bitmap but
+does not disable the local alloc file (which can be used for other
+allocations).
+
+[ Minor cleanup - removed an ML_NOTICE comment. --Mark ]
+
+Signed-off-by: Joel Becker <joel.becker@oracle.com>
+Signed-off-by: Mark Fasheh <mfasheh@suse.com>
+---
+ fs/ocfs2/localalloc.c |   55 +++++++++++++++++++++++++++++++
+ fs/ocfs2/suballoc.c   |   86 ++++++++++++++++++++++++++++++++++++++++----------
+ fs/ocfs2/suballoc.h   |   11 ++++--
+ 3 files changed, 132 insertions(+), 20 deletions(-)
+
+--- a/fs/ocfs2/localalloc.c
++++ b/fs/ocfs2/localalloc.c
+@@ -453,6 +453,46 @@ out:
+       return status;
+ }
++/* Check to see if the local alloc window is within ac->ac_max_block */
++static int ocfs2_local_alloc_in_range(struct inode *inode,
++                                    struct ocfs2_alloc_context *ac,
++                                    u32 bits_wanted)
++{
++      struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++      struct ocfs2_dinode *alloc;
++      struct ocfs2_local_alloc *la;
++      int start;
++      u64 block_off;
++
++      if (!ac->ac_max_block)
++              return 1;
++
++      alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
++      la = OCFS2_LOCAL_ALLOC(alloc);
++
++      start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
++      if (start == -1) {
++              mlog_errno(-ENOSPC);
++              return 0;
++      }
++
++      /*
++       * Converting (bm_off + start + bits_wanted) to blocks gives us
++       * the blkno just past our actual allocation.  This is perfect
++       * to compare with ac_max_block.
++       */
++      block_off = ocfs2_clusters_to_blocks(inode->i_sb,
++                                           le32_to_cpu(la->la_bm_off) +
++                                           start + bits_wanted);
++      mlog(0, "Checking %llu against %llu\n",
++           (unsigned long long)block_off,
++           (unsigned long long)ac->ac_max_block);
++      if (block_off > ac->ac_max_block)
++              return 0;
++
++      return 1;
++}
++
+ /*
+  * make sure we've got at least bitswanted contiguous bits in the
+  * local alloc. You lose them when you drop i_mutex.
+@@ -524,6 +564,21 @@ int ocfs2_reserve_local_alloc_bits(struc
+               }
+       }
++      if (ac->ac_max_block)
++              mlog(0, "Calling in_range for max block %llu\n",
++                   (unsigned long long)ac->ac_max_block);
++
++      if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
++                                      bits_wanted)) {
++              /*
++               * The window is outside ac->ac_max_block.
++               * This errno tells the caller to keep localalloc enabled
++               * but to get the allocation from the main bitmap.
++               */
++              status = -EFBIG;
++              goto bail;
++      }
++
+       ac->ac_inode = local_alloc_inode;
+       /* We should never use localalloc from another slot */
+       ac->ac_alloc_slot = osb->slot_num;
+--- a/fs/ocfs2/suballoc.c
++++ b/fs/ocfs2/suballoc.c
+@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle
+                                 struct ocfs2_chain_list *cl);
+ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
+                                  struct inode *alloc_inode,
+-                                 struct buffer_head *bh);
++                                 struct buffer_head *bh,
++                                 u64 max_block);
+ static int ocfs2_cluster_group_search(struct inode *inode,
+                                     struct buffer_head *group_bh,
+                                     u32 bits_wanted, u32 min_bits,
++                                    u64 max_block,
+                                     u16 *bit_off, u16 *bits_found);
+ static int ocfs2_block_group_search(struct inode *inode,
+                                   struct buffer_head *group_bh,
+                                   u32 bits_wanted, u32 min_bits,
++                                  u64 max_block,
+                                   u16 *bit_off, u16 *bits_found);
+ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
+                                    struct ocfs2_alloc_context *ac,
+@@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluste
+                                               u64 data_blkno,
+                                               u64 *bg_blkno,
+                                               u16 *bg_bit_off);
++static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
++                                           u32 bits_wanted, u64 max_block,
++                                           struct ocfs2_alloc_context **ac);
+ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
+ {
+@@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_ch
+  */
+ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
+                                  struct inode *alloc_inode,
+-                                 struct buffer_head *bh)
++                                 struct buffer_head *bh,
++                                 u64 max_block)
+ {
+       int status, credits;
+       struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
+@@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struc
+       mlog_entry_void();
+       cl = &fe->id2.i_chain;
+-      status = ocfs2_reserve_clusters(osb,
+-                                      le16_to_cpu(cl->cl_cpg),
+-                                      &ac);
++      status = ocfs2_reserve_clusters_with_limit(osb,
++                                                 le16_to_cpu(cl->cl_cpg),
++                                                 max_block, &ac);
+       if (status < 0) {
+               if (status != -ENOSPC)
+                       mlog_errno(status);
+@@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(s
+                       goto bail;
+               }
+-              status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
++              status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
++                                               ac->ac_max_block);
+               if (status < 0) {
+                       if (status != -ENOSPC)
+                               mlog_errno(status);
+@@ -591,6 +599,13 @@ int ocfs2_reserve_new_inode(struct ocfs2
+       (*ac)->ac_group_search = ocfs2_block_group_search;
+       /*
++       * stat(2) can't handle i_ino > 32bits, so we tell the
++       * lower levels not to allocate us a block group past that
++       * limit.
++       */
++      (*ac)->ac_max_block = (u32)~0U;
++
++      /*
+        * slot is set when we successfully steal inode from other nodes.
+        * It is reset in 3 places:
+        * 1. when we flush the truncate log
+@@ -670,9 +685,9 @@ bail:
+ /* Callers don't need to care which bitmap (local alloc or main) to
+  * use so we figure it out for them, but unfortunately this clutters
+  * things a bit. */
+-int ocfs2_reserve_clusters(struct ocfs2_super *osb,
+-                         u32 bits_wanted,
+-                         struct ocfs2_alloc_context **ac)
++static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
++                                           u32 bits_wanted, u64 max_block,
++                                           struct ocfs2_alloc_context **ac)
+ {
+       int status;
+@@ -686,16 +701,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
+       }
+       (*ac)->ac_bits_wanted = bits_wanted;
++      (*ac)->ac_max_block = max_block;
+       status = -ENOSPC;
+       if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
+               status = ocfs2_reserve_local_alloc_bits(osb,
+                                                       bits_wanted,
+                                                       *ac);
+-              if ((status < 0) && (status != -ENOSPC)) {
+-                      mlog_errno(status);
+-                      goto bail;
+-              } else if (status == -ENOSPC) {
++              if (status == -ENOSPC) {
+                       /* reserve_local_bits will return enospc with
+                        * the local alloc inode still locked, so we
+                        * can change this safely here. */
+@@ -704,6 +717,14 @@ int ocfs2_reserve_clusters(struct ocfs2_
+                        * can clean up what's left of the local
+                        * allocation */
+                       osb->local_alloc_state = OCFS2_LA_DISABLED;
++              } else if (status == -EFBIG) {
++                      /* The local alloc window is outside ac_max_block.
++                       * use the main bitmap, but don't disable
++                       * local alloc. */
++                      status = -ENOSPC;
++              } else if (status < 0) {
++                      mlog_errno(status);
++                      goto bail;
+               }
+       }
+@@ -727,6 +748,13 @@ bail:
+       return status;
+ }
++int ocfs2_reserve_clusters(struct ocfs2_super *osb,
++                         u32 bits_wanted,
++                         struct ocfs2_alloc_context **ac)
++{
++      return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
++}
++
+ /*
+  * More or less lifted from ext3. I'll leave their description below:
+  *
+@@ -1009,10 +1037,12 @@ static inline int ocfs2_block_group_reas
+ static int ocfs2_cluster_group_search(struct inode *inode,
+                                     struct buffer_head *group_bh,
+                                     u32 bits_wanted, u32 min_bits,
++                                    u64 max_block,
+                                     u16 *bit_off, u16 *bits_found)
+ {
+       int search = -ENOSPC;
+       int ret;
++      u64 blkoff;
+       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
+       u16 tmp_off, tmp_found;
+       unsigned int max_bits, gd_cluster_off;
+@@ -1046,6 +1076,17 @@ static int ocfs2_cluster_group_search(st
+               if (ret)
+                       return ret;
++              if (max_block) {
++                      blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
++                                                        gd_cluster_off +
++                                                        tmp_off + tmp_found);
++                      mlog(0, "Checking %llu against %llu\n",
++                           (unsigned long long)blkoff,
++                           (unsigned long long)max_block);
++                      if (blkoff > max_block)
++                              return -ENOSPC;
++              }
++
+               /* ocfs2_block_group_find_clear_bits() might
+                * return success, but we still want to return
+                * -ENOSPC unless it found the minimum number
+@@ -1063,19 +1104,31 @@ static int ocfs2_cluster_group_search(st
+ static int ocfs2_block_group_search(struct inode *inode,
+                                   struct buffer_head *group_bh,
+                                   u32 bits_wanted, u32 min_bits,
++                                  u64 max_block,
+                                   u16 *bit_off, u16 *bits_found)
+ {
+       int ret = -ENOSPC;
++      u64 blkoff;
+       struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+       BUG_ON(min_bits != 1);
+       BUG_ON(ocfs2_is_cluster_bitmap(inode));
+-      if (bg->bg_free_bits_count)
++      if (bg->bg_free_bits_count) {
+               ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
+                                                       group_bh, bits_wanted,
+                                                       le16_to_cpu(bg->bg_bits),
+                                                       bit_off, bits_found);
++              if (!ret && max_block) {
++                      blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
++                              *bits_found;
++                      mlog(0, "Checking %llu against %llu\n",
++                           (unsigned long long)blkoff,
++                           (unsigned long long)max_block);
++                      if (blkoff > max_block)
++                              ret = -ENOSPC;
++              }
++      }
+       return ret;
+ }
+@@ -1140,7 +1193,7 @@ static int ocfs2_search_one_group(struct
+       }
+       ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+-                                bit_off, &found);
++                                ac->ac_max_block, bit_off, &found);
+       if (ret < 0) {
+               if (ret != -ENOSPC)
+                       mlog_errno(ret);
+@@ -1213,7 +1266,8 @@ static int ocfs2_search_chain(struct ocf
+       /* for now, the chain search is a bit simplistic. We just use
+        * the 1st group with any empty bits. */
+       while ((status = ac->ac_group_search(alloc_inode, group_bh,
+-                                           bits_wanted, min_bits, bit_off,
++                                           bits_wanted, min_bits,
++                                           ac->ac_max_block, bit_off,
+                                            &tmp_bits)) == -ENOSPC) {
+               if (!bg->bg_next_group)
+                       break;
+--- a/fs/ocfs2/suballoc.h
++++ b/fs/ocfs2/suballoc.h
+@@ -28,10 +28,11 @@
+ typedef int (group_search_t)(struct inode *,
+                            struct buffer_head *,
+-                           u32,
+-                           u32,
+-                           u16 *,
+-                           u16 *);
++                           u32,                       /* bits_wanted */
++                           u32,                       /* min_bits */
++                           u64,                       /* max_block */
++                           u16 *,                     /* *bit_off */
++                           u16 *);                    /* *bits_found */
+ struct ocfs2_alloc_context {
+       struct inode *ac_inode;    /* which bitmap are we allocating from? */
+@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
+       group_search_t *ac_group_search;
+       u64    ac_last_group;
++      u64    ac_max_block;  /* Highest block number to allocate. 0 is
++                               is the same as ~0 - unlimited */
+ };
+ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);