]> git.ipfire.org Git - thirdparty/e2fsprogs.git/commitdiff
New bitmap and inode table allocation for FLEX_BG e2fsprogs-interim
authorJose R. Santos <jrs@us.ibm.com>
Fri, 29 Feb 2008 14:39:03 +0000 (09:39 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 29 Feb 2008 14:39:30 +0000 (09:39 -0500)
Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new "-G" option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Signed-off-by: Valerie Clement <valerie.clement@bull.net>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
lib/ext2fs/alloc_tables.c
lib/ext2fs/closefs.c
lib/ext2fs/ext2_fs.h
lib/ext2fs/initialize.c
misc/mke2fs.8.in
misc/mke2fs.c

index 290e54b25fea48512a50f2816bebc3103d7b6346..bee02f31a38443ebadd485c6dd84f0574f810a1f 100644 (file)
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+void ext2fs_bgd_set_flex_meta_flag(ext2_filsys fs, blk_t block)
+{
+       dgrp_t  group;
+
+       group = ext2fs_group_of_blk(fs, block);
+       if (!(fs->group_desc[group].bg_flags & EXT2_BG_FLEX_METADATA))
+               fs->group_desc[group].bg_flags |= EXT2_BG_FLEX_METADATA;
+}
+
+/*
+ * This routine searches for free blocks that can allocate a full
+ * group of bitmaps or inode tables for a flexbg group.  Returns the
+ * block number with a correct offset were the bitmaps and inode
+ * tables can be allocated continously and in order.
+ */
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
+                          ext2fs_block_bitmap bmap, int offset, int size)
+{
+       int             flexbg, flexbg_size, elem_size;
+       blk_t           last_blk, first_free = 0;
+       dgrp_t          last_grp;
+
+       flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+       flexbg = group / flexbg_size;
+
+       if (size > fs->super->s_blocks_per_group / 8)
+               size = fs->super->s_blocks_per_group / 8;
+
+       /*
+        * Dont do a long search if the previous block
+        * search is still valid.
+        */
+       if (start_blk && group % flexbg_size) {
+               if (size > flexbg_size)
+                       elem_size = fs->inode_blocks_per_group;
+               else
+                       elem_size = 1;
+               if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
+                                                  size))
+                       return start_blk + elem_size;
+       }
+
+       start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+       last_grp = group | (flexbg_size - 1);
+       if (last_grp > fs->group_desc_count)
+               last_grp = fs->group_desc_count;
+       last_blk = ext2fs_group_last_block(fs, last_grp);
+
+       /* Find the first available block */
+       if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
+                                  &first_free))
+               return first_free;
+
+       if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
+                                  bmap, &first_free))
+               return first_free;
+
+       return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
                                      ext2fs_block_bitmap bmap)
 {
        errcode_t       retval;
        blk_t           group_blk, start_blk, last_blk, new_blk, blk;
-       int             j;
+       dgrp_t          last_grp;
+       int             j, rem_grps, flexbg_size = 0;
 
        group_blk = ext2fs_group_first_block(fs, group);
        last_blk = ext2fs_group_last_block(fs, group);
 
        if (!bmap)
                bmap = fs->block_map;
+
+       if (EXT2_HAS_INCOMPAT_FEATURE(fs->super,
+                                      EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+               flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+               last_grp = group | (flexbg_size - 1);
+               rem_grps = last_grp - group;
+               if (last_grp > fs->group_desc_count)
+                       last_grp = fs->group_desc_count;
+       }
        
        /*
         * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +126,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
        } else
                start_blk = group_blk;
 
+       if (flexbg_size) {
+               int prev_block = 0;
+               if (group && fs->group_desc[group-1].bg_block_bitmap)
+                       prev_block = fs->group_desc[group-1].bg_block_bitmap;
+               start_blk = ext2fs_flexbg_offset(fs, group, prev_block, bmap,
+                                                0, rem_grps);
+               last_blk = ext2fs_group_last_block(fs, last_grp);
+       }
+
        if (!fs->group_desc[group].bg_block_bitmap) {
                retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
                                                1, bmap, &new_blk);
@@ -66,6 +145,21 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
                        return retval;
                ext2fs_mark_block_bitmap(bmap, new_blk);
                fs->group_desc[group].bg_block_bitmap = new_blk;
+               if (flexbg_size) {
+                       dgrp_t tmp = ext2fs_group_of_blk(fs, new_blk);
+                       ext2fs_bgd_set_flex_meta_flag(fs, new_blk);
+                       fs->group_desc[tmp].bg_free_blocks_count--;
+                       fs->super->s_free_blocks_count--;
+               }
+       }
+
+       if (flexbg_size) {
+               int prev_block = 0;
+               if (group && fs->group_desc[group-1].bg_inode_bitmap)
+                       prev_block = fs->group_desc[group-1].bg_inode_bitmap;
+               start_blk = ext2fs_flexbg_offset(fs, group, prev_block, bmap,
+                                                flexbg_size, rem_grps);
+               last_blk = ext2fs_group_last_block(fs, last_grp);
        }
 
        if (!fs->group_desc[group].bg_inode_bitmap) {
@@ -78,11 +172,28 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
                        return retval;
                ext2fs_mark_block_bitmap(bmap, new_blk);
                fs->group_desc[group].bg_inode_bitmap = new_blk;
+               if (flexbg_size) {
+                       dgrp_t tmp = ext2fs_group_of_blk(fs, new_blk);
+                       ext2fs_bgd_set_flex_meta_flag(fs, new_blk);
+                       fs->group_desc[tmp].bg_free_blocks_count--;
+                       fs->super->s_free_blocks_count--;
+               }
        }
 
        /*
         * Allocate the inode table
         */
+       if (flexbg_size) {
+               int prev_block = 0;
+               if (group && fs->group_desc[group-1].bg_inode_table)
+                       prev_block = fs->group_desc[group-1].bg_inode_table;
+               group_blk = ext2fs_flexbg_offset(fs, group, prev_block, bmap,
+                                                flexbg_size * 2,
+                                                fs->inode_blocks_per_group *
+                                                rem_grps);
+               last_blk = ext2fs_group_last_block(fs, last_grp);
+       }
+
        if (!fs->group_desc[group].bg_inode_table) {
                retval = ext2fs_get_free_blocks(fs, group_blk, last_blk,
                                                fs->inode_blocks_per_group,
@@ -91,8 +202,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
                        return retval;
                for (j=0, blk = new_blk;
                     j < fs->inode_blocks_per_group;
-                    j++, blk++)
+                    j++, blk++) {
                        ext2fs_mark_block_bitmap(bmap, blk);
+                       if (flexbg_size) {
+                               dgrp_t tmp = ext2fs_group_of_blk(fs, blk);
+                               ext2fs_bgd_set_flex_meta_flag(fs, blk);
+                               fs->group_desc[tmp].bg_free_blocks_count--;
+                               fs->super->s_free_blocks_count--;
+                       }
+               }
                fs->group_desc[group].bg_inode_table = new_blk;
        }
        fs->group_desc[group].bg_checksum =
index 659ee27f03676ae85d32922e165ca4038b623bf2..86ef29af9d15d6507b3e445e35fd10eb0788d1b9 100644 (file)
@@ -100,8 +100,9 @@ int ext2fs_super_and_bgd_loc(ext2_filsys fs,
                        numblocks--;
                }
        }
-               
-       numblocks -= 2 + fs->inode_blocks_per_group;
+
+       if (!fs->super->s_log_groups_per_flex)
+               numblocks -= 2 + fs->inode_blocks_per_group;
 
        if (ret_super_blk)
                *ret_super_blk = super_blk;
index 412b49b7de41f1533ce8d292bd877de3c40cb8a0..caaeba27d5fb2a91c6752e053bdbdca601c1fc16 100644 (file)
@@ -174,6 +174,7 @@ struct ext4_group_desc
 #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not initialized */
 #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized */
 #define EXT2_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
+#define EXT2_BG_FLEX_METADATA  0x0008 /* FLEX_BG block group contains meta-data */
 
 /*
  * Data structures used by the directory indexing feature
@@ -598,7 +599,10 @@ struct ext2_super_block {
        __u16   s_mmp_update_interval;  /* # seconds to wait in MMP checking */
        __u64   s_mmp_block;            /* Block for multi-mount protection */
        __u32   s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
-       __u32   s_reserved[163];        /* Padding to the end of the block */
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+       __u8    s_reserved_char_pad;
+       __u16   s_reserved_pad;         /* Padding to next 32bits */
+       __u32   s_reserved[162];        /* Padding to the end of the block */
 };
 
 /*
index 1916655b3f9b6e423ad3b608275861e254acb9b5..c3939e5eaaacf24e65aa5fedfac6f1ac52b91e7d 100644 (file)
@@ -159,6 +159,7 @@ errcode_t ext2fs_initialize(const char *name, int flags,
        set_field(s_raid_stride, 0);            /* default stride size: 0 */
        set_field(s_raid_stripe_width, 0);      /* default stripe width: 0 */
        set_field(s_flags, 0);
+       set_field(s_log_groups_per_flex, 0);
        if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
                retval = EXT2_ET_UNSUPP_FEATURE;
                goto cleanup;
@@ -366,7 +367,10 @@ ipg_retry:
         * group, and fill in the correct group statistics for group.
         * Note that although the block bitmap, inode bitmap, and
         * inode table have not been allocated (and in fact won't be
-        * by this routine), they are accounted for nevertheless.
+        * by this routine), they are accounted for nevertheless.  If
+        * FLEX_BG meta-data grouping is used, only account for the
+        * superblock and group descriptors (the inode tables and
+        * bitmaps will be accounted for when allocated).
         */
        super->s_free_blocks_count = 0;
        for (i = 0; i < fs->group_desc_count; i++) {
index c7db2401daec4ca3301ba3f2e9357bbd6e68ea9e..a6bded4c70adffd156e7312811dc2f49bd330c53 100644 (file)
@@ -26,6 +26,10 @@ mke2fs \- create an ext2/ext3 filesystem
 .I blocks-per-group
 ]
 [
+.B \-G
+.I number-of-groups
+]
+[
 .B \-i
 .I bytes-per-inode
 ]
@@ -232,6 +236,12 @@ option rather than manipulating the number of blocks per group.)
 This option is generally used by developers who
 are developing test cases.  
 .TP
+.BI \-G " number-of-groups"
+Specify the number of block goups that will be packed together to
+create one large virtual block group on an ext4 filesystem.  This
+improves meta-data locality and performance on meta-data heavy
+workloads.  The number of goups must be a power of 2.
+.TP
 .BI \-i " bytes-per-inode"
 Specify the bytes/inode ratio. 
 .B mke2fs
@@ -421,6 +431,11 @@ Use hashed b-trees to speed up lookups in large directories.
 .B filetype
 Store file type information in directory entries.
 .TP
+.B flex_bg
+Allow bitmaps and inode tables for a block group to be placed anywhere
+on the storage media (use with -G option to group meta-data in order
+to create a large virtual block group).
+.TP
 .B has_journal
 Create an ext3 journal (as if using the
 .B \-j
index 0184af735e32778677082191b73efe51000e9672..40dac031da9b81888eff1cfafb0e7195f2af1186 100644 (file)
@@ -96,7 +96,7 @@ static void usage(void)
 {
        fprintf(stderr, _("Usage: %s [-c|-t|-l filename] [-b block-size] "
        "[-f fragment-size]\n\t[-i bytes-per-inode] [-I inode-size] "
-       "[-j] [-J journal-options]\n"
+       "[-j] [-J journal-options] [-G meta group size]\n"
        "\t[-N number-of-inodes] [-m reserved-blocks-percentage] "
        "[-o creator-os]\n\t[-g blocks-per-group] [-L volume-label] "
        "[-M last-mounted-directory]\n\t[-O feature[,...]] "
@@ -476,7 +476,8 @@ static void setup_lazy_bg(ext2_filsys fs)
                         * group because it may need block bitmap padding. */
                        if ((ext2fs_bg_has_super(fs, i) &&
                             sb->s_reserved_gdt_blocks) ||
-                           i == fs->group_desc_count - 1)
+                           i == fs->group_desc_count - 1 ||
+                           (bg->bg_flags & EXT2_BG_FLEX_METADATA))
                                continue;
 
                        blks = ext2fs_super_and_bgd_loc(fs, i, 0, 0, 0, 0);
@@ -962,6 +963,7 @@ static void PRS(int argc, char *argv[])
        int             blocksize = 0;
        int             inode_ratio = 0;
        int             inode_size = 0;
+       unsigned long   flex_bg_size = 0;
        double          reserved_ratio = 5.0;
        int             sector_size = 0;
        int             show_version_only = 0;
@@ -1044,7 +1046,7 @@ static void PRS(int argc, char *argv[])
        }
 
        while ((c = getopt (argc, argv,
-                   "b:cf:g:i:jl:m:no:qr:s:tvE:FI:J:L:M:N:O:R:ST:V")) != EOF) {
+                   "b:cf:g:G:i:jl:m:no:qr:s:tvE:FI:J:L:M:N:O:R:ST:V")) != EOF) {
                switch (c) {
                case 'b':
                        blocksize = strtol(optarg, &tmp, 0);
@@ -1095,6 +1097,20 @@ static void PRS(int argc, char *argv[])
                                exit(1);
                        }
                        break;
+               case 'G':
+                       flex_bg_size = strtoul(optarg, &tmp, 0);
+                       if (*tmp) {
+                               com_err(program_name, 0,
+                                       _("Illegal number for Flex_BG size"));
+                               exit(1);
+                       }
+                       if (flex_bg_size < 2 ||
+                           (flex_bg_size & (flex_bg_size-1)) != 0) {
+                               com_err(program_name, 0,
+                                       _("Flex_BG size must be a power of 2"));
+                               exit(1);
+                       }
+                       break;
                case 'i':
                        inode_ratio = strtoul(optarg, &tmp, 0);
                        if (inode_ratio < EXT2_MIN_BLOCK_SIZE ||
@@ -1490,6 +1506,9 @@ static void PRS(int argc, char *argv[])
                }
        }
 
+       if (flex_bg_size)
+               fs_param.s_log_groups_per_flex = int_log2(flex_bg_size);
+
        if (!force && fs_param.s_blocks_count >= ((unsigned) 1 << 31)) {
                com_err(program_name, 0,
                        _("Filesystem too large.  No more than 2**31-1 blocks\n"