]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super1.c
Improve allocation and use of space for bitmaps in version1 metadata
[thirdparty/mdadm.git] / super1.c
index 467ff68b932f0673a42bb0d471100f0006174e4e..7aa324fc459b06a64c6c0ddf1b87c772e661fbce 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -134,7 +134,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
 
        csum = (newcsum & 0xffffffff) + (newcsum >> 32);
        sb->sb_csum = disk_csum;
-       return csum;
+       return __cpu_to_le32(csum);
 }
 
 #ifndef MDASSEMBLE
@@ -147,6 +147,7 @@ static void examine_super1(void *sbv, char *homehost)
        int i;
        char *c;
        int l = homehost ? strlen(homehost) : 0;
+       int layout;
 
        printf("          Magic : %08x\n", __le32_to_cpu(sb->magic));
        printf("        Version : %02d\n", 1);
@@ -175,19 +176,27 @@ static void examine_super1(void *sbv, char *homehost)
                switch(__le32_to_cpu(sb->level)) {
                case 1: ddsks=1;break;
                case 4:
-               case 5: ddsks = sb->raid_disks-1; break;
-               case 6: ddsks = sb->raid_disks-2; break;
-               case 10: ddsks = sb->raid_disks / (sb->layout&255) / ((sb->layout>>8)&255);
+               case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
+               case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
+               case 10:
+                       layout = __le32_to_cpu(sb->layout);
+                       ddsks = __le32_to_cpu(sb->raid_disks)
+                                / (layout&255) / ((layout>>8)&255);
                }
                if (ddsks)
-                       printf("     Array Size : %llu%s\n", ddsks*(unsigned long long)sb->size, human_size(ddsks*sb->size<<9));
+                       printf("     Array Size : %llu%s\n",
+                              ddsks*(unsigned long long)__le64_to_cpu(sb->size),
+                              human_size(ddsks*__le64_to_cpu(sb->size)<<9));
                if (sb->size != sb->data_size)
-                       printf("      Used Size : %llu%s\n", (unsigned long long)sb->size, human_size(sb->size<<9));
+                       printf("      Used Size : %llu%s\n",
+                              (unsigned long long)__le64_to_cpu(sb->size),
+                              human_size(__le64_to_cpu(sb->size)<<9));
        }
        if (sb->data_offset)
-               printf("    Data Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->data_offset));
-       if (sb->super_offset)
-               printf("   Super Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->super_offset));
+               printf("    Data Offset : %llu sectors\n",
+                      (unsigned long long)__le64_to_cpu(sb->data_offset));
+       printf("   Super Offset : %llu sectors\n",
+              (unsigned long long)__le64_to_cpu(sb->super_offset));
        if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET)
                printf("Recovery Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->recovery_offset));
        printf("          State : %s\n", (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
@@ -277,6 +286,18 @@ static void examine_super1(void *sbv, char *homehost)
        default: break;
        }
        printf("\n");
+       printf("    Array Slot : %d (", __le32_to_cpu(sb->dev_number));
+       for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--)
+               if (__le16_to_cpu(sb->dev_roles[i-1]) != 0xffff)
+                       break;
+       for (d=0; d < i; d++) {
+               int role = __le16_to_cpu(sb->dev_roles[d]);
+               if (d) printf(", ");
+               if (role == 0xffff) printf("empty");
+               else if(role == 0xfffe) printf("failed");
+               else printf("%d", role);
+       }
+       printf(")\n");
        printf("   Array State : ");
        for (d=0; d<__le32_to_cpu(sb->raid_disks); d++) {
                int cnt = 0;
@@ -311,6 +332,7 @@ static void brief_examine_super1(void *sbv)
        struct mdp_superblock_1 *sb = sbv;
        int i;
        char *nm;
+       char *c=map_num(pers, __le32_to_cpu(sb->level));
 
        nm = strchr(sb->set_name, ':');
        if (nm)
@@ -320,14 +342,12 @@ static void brief_examine_super1(void *sbv)
        else
                nm = "??";
 
-       char *c=map_num(pers, __le32_to_cpu(sb->level));
-
        printf("ARRAY /dev/md/%s level=%s metadata=1 num-devices=%d UUID=",
               nm,
-              c?c:"-unknown-", sb->raid_disks);
+              c?c:"-unknown-", __le32_to_cpu(sb->raid_disks));
        for (i=0; i<16; i++) {
-               printf("%02x", sb->set_uuid[i]);
                if ((i&3)==0 && i != 0) printf(":");
+               printf("%02x", sb->set_uuid[i]);
        }
        if (sb->set_name[0])
                printf(" name=%.32s", sb->set_name);
@@ -466,10 +486,18 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update,
        int rv = 0;
        struct mdp_superblock_1 *sb = sbv;
 
-       if (strcmp(update, "force")==0) {
+       if (strcmp(update, "force-one")==0) {
+               /* Not enough devices for a working array,
+                * so bring this one up-to-date
+                */
                if (sb->events != __cpu_to_le64(info->events))
                        rv = 1;
                sb->events = __cpu_to_le64(info->events);
+       }
+       if (strcmp(update, "force-array")==0) {
+               /* Degraded array and 'force' requests to
+                * maybe need to mark it 'clean'.
+                */
                switch(__le32_to_cpu(sb->level)) {
                case 5: case 4: case 6:
                        /* need to force clean */
@@ -490,25 +518,13 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update,
                        rv = 1;
                }
        }
-#if 0
-       if (strcmp(update, "newdev") == 0) {
-               int d = info->disk.number;
-               memset(&sb->disks[d], 0, sizeof(sb->disks[d]));
-               sb->disks[d].number = d;
-               sb->disks[d].major = info->disk.major;
-               sb->disks[d].minor = info->disk.minor;
-               sb->disks[d].raid_disk = info->disk.raid_disk;
-               sb->disks[d].state = info->disk.state;
-               sb->this_disk = sb->disks[d];
-       }
-#endif
        if (strcmp(update, "grow") == 0) {
                sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
                /* FIXME */
        }
        if (strcmp(update, "resync") == 0) {
                /* make sure resync happens */
-               sb->resync_offset = ~0ULL;
+               sb->resync_offset = 0ULL;
        }
        if (strcmp(update, "uuid") == 0) {
                memcpy(sb->set_uuid, info->uuid, 16);
@@ -527,7 +543,7 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update,
                        strncpy(info->name, c+1, 31 - (c-sb->set_name));
                else
                        strncpy(info->name, sb->set_name, 32);
-               info->name[33] = 0;
+               info->name[32] = 0;
        }
        if (strcmp(update, "name") == 0) {
                if (info->name[0] == 0)
@@ -549,13 +565,6 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update,
        return rv;
 }
 
-
-static __u64 event_super1(void *sbv)
-{
-       struct mdp_superblock_1 *sb = sbv;
-       return __le64_to_cpu(sb->events);
-}
-
 static int init_super1(struct supertype *st, void **sbp, mdu_array_info_t *info,
                       unsigned long long size, char *name, char *homehost)
 {
@@ -735,8 +744,9 @@ static int write_init_super1(struct supertype *st, void *sbv,
        int fd = open(devname, O_RDWR | O_EXCL);
        int rfd;
        int rv;
+       int bm_space;
 
-       unsigned long size, space;
+       unsigned long space;
        unsigned long long dsize, array_size;
        long long sb_offset;
 
@@ -749,7 +759,7 @@ static int write_init_super1(struct supertype *st, void *sbv,
 
        sb->dev_number = __cpu_to_le32(dinfo->number);
        if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY))
-               sb->devflags |= WriteMostly1;
+               sb->devflags |= __cpu_to_le32(WriteMostly1);
 
        if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
            read(rfd, sb->device_uuid, 16) != 16) {
@@ -768,7 +778,11 @@ static int write_init_super1(struct supertype *st, void *sbv,
                if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
                        /* same array, so preserve events and dev_number */
                        sb->events = refsb->events;
-                       sb->dev_number = refsb->dev_number;
+                       /* bugs in 2.6.17 and earlier mean the dev_number
+                        * chosen in Manage must be preserved
+                        */
+                       if (get_linux_version() >= 2006018)
+                               sb->dev_number = refsb->dev_number;
                }
                free(refsb);
        }
@@ -777,6 +791,7 @@ static int write_init_super1(struct supertype *st, void *sbv,
        if (ioctl(fd, BLKGETSIZE64, &dsize) != 0)
 #endif
        {
+               unsigned long size;
                if (ioctl(fd, BLKGETSIZE, &size))
                        return 1;
                else
@@ -801,6 +816,14 @@ static int write_init_super1(struct supertype *st, void *sbv,
         * for a bitmap.
         */
        array_size = __le64_to_cpu(sb->size);
+       /* work out how much space we left of a bitmap */
+       if (array_size >= 200*1024*1024*2)
+               bm_space = 128*2;
+       else if (array_size > 8*1024*1024*2)
+               bm_space = 64*2;
+       else
+               bm_space = 0;
+
        switch(st->minor_version) {
        case 0:
                sb_offset = dsize;
@@ -808,19 +831,12 @@ static int write_init_super1(struct supertype *st, void *sbv,
                sb_offset &= ~(4*2-1);
                sb->super_offset = __cpu_to_le64(sb_offset);
                sb->data_offset = __cpu_to_le64(0);
-               if (sb_offset-64*2 >= array_size && array_size > 8*1024*1024*2)
-                       sb->data_size = __cpu_to_le64(sb_offset-64*2);
-               else
-                       sb->data_size = __cpu_to_le64(sb_offset);
+               sb->data_size = __cpu_to_le64(sb_offset - bm_space);
                break;
        case 1:
                sb->super_offset = __cpu_to_le64(0);
-               if (dsize - 64*2 >= array_size && array_size > 8*1024*1024*2)
-                       space = 64*2;
-               else
-                       space = 4*2;
-               sb->data_offset = __cpu_to_le64(space); /* leave space for super and bitmap */
-               sb->data_size = __cpu_to_le64(dsize - space);
+               sb->data_offset = __cpu_to_le64(bm_space + 4*2);
+               sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
                break;
        case 2:
                sb_offset = 4*2;
@@ -828,9 +844,9 @@ static int write_init_super1(struct supertype *st, void *sbv,
                        space = 64*2;
                else
                        space = 4*2;
-               sb->super_offset = __cpu_to_le64(sb_offset);
-               sb->data_offset = __cpu_to_le64(sb_offset+space);
-               sb->data_size = __cpu_to_le64(dsize - 4*2 - space);
+               sb->super_offset = __cpu_to_le64(4*2);
+               sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
+               sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 - bm_space );
                break;
        default:
                return -EINVAL;
@@ -1076,11 +1092,11 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize)
                return 0;
 
        /* if the device is bigger than 8Gig, save 64k for bitmap usage,
-        * if biffer than 200Gig, save 128k
+        * if bigger than 200Gig, save 128k
         */
-       if (devsize > 200*1024*1024*2)
+       if (devsize-64*2 >= 200*1024*1024*2)
                devsize -= 128*2;
-       else if (devsize > 8*1024*1024*2)
+       else if (devsize >= 8*1024*1024*2)
                devsize -= 64*2;
 
        switch(st->minor_version) {
@@ -1099,7 +1115,8 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize)
 
 static int
 add_internal_bitmap1(struct supertype *st, void *sbv,
-                    int chunk, int delay, int write_behind, unsigned long long size,
+                    int *chunkp, int delay, int write_behind,
+                    unsigned long long size,
                     int may_change, int major)
 {
        /*
@@ -1107,22 +1124,82 @@ add_internal_bitmap1(struct supertype *st, void *sbv,
         * must fit after the superblock.
         * If may_change, then this is create, and we can put the bitmap
         * before the superblock if we like, or may move the start.
-        * For now, just squeeze the bitmap into 3k and don't change anything.
+        * If !may_change, the bitmap MUST live at offset of 1K, until
+        * we get a sysfs interface.
         *
         * size is in sectors,  chunk is in bytes !!!
         */
 
        unsigned long long bits;
-       unsigned long long max_bits = (3*512 - sizeof(bitmap_super_t)) * 8;
+       unsigned long long max_bits;
        unsigned long long min_chunk;
+       long offset;
+       int chunk = *chunkp;
+       int room;
        struct mdp_superblock_1 *sb = sbv;
        bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + 1024);
 
-       if (st->minor_version && !may_change &&
-           __le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->super_offset) < 8)
-               return 0; /* doesn't fit */
+       switch(st->minor_version) {
+       case 0:
+               /* either 3K after the superblock, or some amount of space
+                * before.
+                */
+               if (may_change) {
+                       /* We are creating array, so we *know* how much room has
+                        * been left.
+                        */
+                       offset = 0;
+                       if (__le64_to_cpu(sb->size) >= 200*1024*1024*2)
+                               room = 128*2;
+                       else if (__le64_to_cpu(sb->size) > 8*1024*1024*2)
+                               room = 64*2;
+                       else {
+                               room = 3*2;
+                               offset = 2;
+                       }
+               } else {
+                       room = __le64_to_cpu(sb->super_offset)
+                               - __le64_to_cpu(sb->data_offset)
+                               - __le64_to_cpu(sb->data_size);
+                       /* remove '1 ||' when we can set offset via sysfs */
+                       if (1 || (room < 3*2 &&
+                                 __le32_to_cpu(sb->max_dev) <= 384)) {
+                               room = 3*2;
+                               offset = 1*2;
+                       } else {
+                               offset = 0; /* means movable offset */
+                       }
+               }
+               break;
+       case 1:
+       case 2: /* between superblock and data */
+               if (may_change) {
+                       offset = 4*2;
+                       if (__le64_to_cpu(sb->size) >= 200*1024*1024*2)
+                               room = 128*2;
+                       else if (__le64_to_cpu(sb->size) > 8*1024*1024*2)
+                               room = 64*2;
+                       else
+                               room = 3*2;
+               } else {
+                       room = __le64_to_cpu(sb->data_offset)
+                               - __le64_to_cpu(sb->super_offset);
+                       if (1 || __le32_to_cpu(sb->max_dev) <= 384) {
+                               room -= 2;
+                               offset = 2;
+                       } else {
+                               room -= 4*2;
+                               offset = 4*2;
+                       }
+               }
+               break;
+       }
 
+       if (chunk == UnSet && room > 128*2)
+               /* Limit to 128K of bitmap when chunk size not requested */
+               room = 128*2;
 
+       max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
 
        min_chunk = 4096; /* sub-page chunks don't work yet.. */
        bits = (size*512)/min_chunk +1;
@@ -1137,7 +1214,13 @@ add_internal_bitmap1(struct supertype *st, void *sbv,
        if (chunk == 0) /* rounding problem */
                return 0;
 
-       sb->bitmap_offset = __cpu_to_le32(2);
+       if (offset == 0) {
+               bits = (size*512) / chunk + 1;
+               room = ((bits+7)/8 + sizeof(bitmap_super_t) +511)/512;
+               offset = -room;
+       }
+
+       sb->bitmap_offset = __cpu_to_le32(offset);
 
        sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) | 1);
        memset(bms, 0, sizeof(*bms));
@@ -1149,6 +1232,7 @@ add_internal_bitmap1(struct supertype *st, void *sbv,
        bms->sync_size = __cpu_to_le64(size);
        bms->write_behind = __cpu_to_le32(write_behind);
 
+       *chunkp = chunk;
        return 1;
 }
 
@@ -1218,7 +1302,6 @@ struct superswitch super1 = {
        .uuid_from_super = uuid_from_super1,
        .getinfo_super = getinfo_super1,
        .update_super = update_super1,
-       .event_super = event_super1,
        .init_super = init_super1,
        .add_to_super = add_to_super1,
        .store_super = store_super1,