]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super1.c
Consistently print program Name and __func__ in debug messages.
[thirdparty/mdadm.git] / super1.c
index 9993386383f90c39f259440a5af3e99eb50d9432..0fd84e25bfde2fbfd1f5fb814f0d1e42bd7c57f8 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -22,6 +22,7 @@
  *    Email: <neilb@suse.de>
  */
 
+#include <stddef.h>
 #include "mdadm.h"
 /*
  * The version-1 superblock :
@@ -57,7 +58,7 @@ struct mdp_superblock_1 {
        __u64   reshape_position;       /* next address in array-space for reshape */
        __u32   delta_disks;    /* change in number of raid_disks               */
        __u32   new_layout;     /* new layout                                   */
-       __u32   new_chunk;      /* new chunk size (bytes)                       */
+       __u32   new_chunk;      /* new chunk size (sectors)                     */
        __u32   new_offset;     /* signed number to add to data_offset in new
                                 * layout.  0 == no-change.  This can be
                                 * different on each device in the array.
@@ -133,9 +134,6 @@ struct misc_dev_info {
                                        |MD_FEATURE_NEW_OFFSET          \
                                        )
 
-#ifndef offsetof
-#define offsetof(t,f) ((size_t)&(((t*)0)->f))
-#endif
 static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
 {
        unsigned int disk_csum, csum;
@@ -269,6 +267,7 @@ static void examine_super1(struct supertype *st, char *homehost)
        int l = homehost ? strlen(homehost) : 0;
        int layout;
        unsigned long long sb_offset;
+       struct mdinfo info;
 
        printf("          Magic : %08x\n", __le32_to_cpu(sb->magic));
        printf("        Version : 1");
@@ -327,7 +326,8 @@ static void examine_super1(struct supertype *st, char *homehost)
        if (sb->data_offset)
                printf("    Data Offset : %llu sectors\n",
                       (unsigned long long)__le64_to_cpu(sb->data_offset));
-       if (sb->new_offset) {
+       if (sb->new_offset &&
+           (__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) {
                unsigned long long offset = __le64_to_cpu(sb->data_offset);
                offset += (signed)(int32_t)__le32_to_cpu(sb->new_offset);
                printf("     New Offset : %llu sectors\n", offset);
@@ -336,6 +336,13 @@ static void examine_super1(struct supertype *st, char *homehost)
               (unsigned long long)__le64_to_cpu(sb->super_offset));
        if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET)
                printf("Recovery Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->recovery_offset));
+
+       st->ss->getinfo_super(st, &info, NULL);
+       if (info.space_after != 1 &&
+           !(__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
+               printf("   Unused Space : before=%llu sectors, after=%llu sectors\n",
+                      info.space_before, info.space_after);
+
        printf("          State : %s\n", (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
        printf("    Device UUID : ");
        for (i=0; i<16; i++) {
@@ -395,14 +402,13 @@ static void examine_super1(struct supertype *st, char *homehost)
        if (sb->bblog_size && sb->bblog_offset) {
                printf("  Bad Block Log : %d entries available at offset %ld sectors",
                       __le16_to_cpu(sb->bblog_size)*512/8,
-                      (long)__le32_to_cpu(sb->bblog_offset));
+                      (long)(int32_t)__le32_to_cpu(sb->bblog_offset));
                if (sb->feature_map &
                    __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
                        printf(" - bad blocks present.");
                printf("\n");
        }
 
-
        if (calc_sb_1_csum(sb) == sb->sb_csum)
                printf("       Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
        else
@@ -461,6 +467,8 @@ static void examine_super1(struct supertype *st, char *homehost)
                role = 0xFFFF;
        if (role >= 0xFFFE)
                printf("spare\n");
+       else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_REPLACEMENT))
+               printf("Replacement device %d\n", role);
        else
                printf("Active device %d\n", role);
 
@@ -473,9 +481,14 @@ static void examine_super1(struct supertype *st, char *homehost)
                        if (role == d)
                                cnt++;
                }
-               if (cnt > 1) printf("?");
-               else if (cnt == 1) printf("A");
-               else printf (".");
+               if (cnt == 2)
+                       printf("R");
+               else if (cnt == 1)
+                       printf("A");
+               else if (cnt == 0)
+                       printf(".");
+               else
+                       printf("?");
        }
 #if 0
        /* This is confusing too */
@@ -487,11 +500,10 @@ static void examine_super1(struct supertype *st, char *homehost)
        }
        if (faulty) printf(" %d failed", faulty);
 #endif
-       printf(" ('A' == active, '.' == missing)");
+       printf(" ('A' == active, '.' == missing, 'R' == replacing)");
        printf("\n");
 }
 
-
 static void brief_examine_super1(struct supertype *st, int verbose)
 {
        struct mdp_superblock_1 *sb = st->sb;
@@ -590,6 +602,143 @@ static void export_examine_super1(struct supertype *st)
               (unsigned long long)__le64_to_cpu(sb->events));
 }
 
+static int copy_metadata1(struct supertype *st, int from, int to)
+{
+       /* Read superblock.  If it looks good, write it out.
+        * Then if a bitmap is present, copy that.
+        * And if a bad-block-list is present, copy that too.
+        */
+       void *buf;
+       unsigned long long dsize, sb_offset;
+       const int bufsize = 4*1024;
+       struct mdp_superblock_1 super, *sb;
+
+       if (posix_memalign(&buf, 4096, bufsize) != 0)
+               return 1;
+
+       if (!get_dev_size(from, NULL, &dsize))
+               goto err;
+
+       dsize >>= 9;
+       if (dsize < 24)
+               goto err;
+       switch(st->minor_version) {
+       case 0:
+               sb_offset = dsize;
+               sb_offset -= 8*2;
+               sb_offset &= ~(4*2-1);
+               break;
+       case 1:
+               sb_offset = 0;
+               break;
+       case 2:
+               sb_offset = 4*2;
+               break;
+       default:
+               goto err;
+       }
+
+       if (lseek64(from, sb_offset << 9, 0) < 0LL)
+               goto err;
+       if (read(from, buf, bufsize) != bufsize)
+               goto err;
+
+       sb = buf;
+       super = *sb; // save most of sb for when we reuse buf
+
+       if (__le32_to_cpu(super.magic) != MD_SB_MAGIC ||
+           __le32_to_cpu(super.major_version) != 1 ||
+           __le64_to_cpu(super.super_offset) != sb_offset ||
+           calc_sb_1_csum(sb) != super.sb_csum)
+               goto err;
+
+       if (lseek64(to, sb_offset << 9, 0) < 0LL)
+               goto err;
+       if (write(to, buf, bufsize) != bufsize)
+               goto err;
+
+       if (super.feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+               unsigned long long bitmap_offset = sb_offset;
+               int bytes = 4096; // just an estimate.
+               int written = 0;
+               struct align_fd afrom, ato;
+
+               init_afd(&afrom, from);
+               init_afd(&ato, to);
+
+               bitmap_offset += (int32_t)__le32_to_cpu(super.bitmap_offset);
+
+               if (lseek64(from, bitmap_offset<<9, 0) < 0)
+                       goto err;
+               if (lseek64(to, bitmap_offset<<9, 0) < 0)
+                       goto err;
+
+               for (written = 0; written < bytes ; ) {
+                       int n = bytes - written;
+                       if (n > 4096)
+                               n = 4096;
+                       if (aread(&afrom, buf, n) != n)
+                               goto err;
+                       if (written == 0) {
+                               /* have the header, can calculate
+                                * correct bitmap bytes */
+                               bitmap_super_t *bms;
+                               int bits;
+                               bms = (void*)buf;
+                               bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+                               bytes = (bits+7) >> 3;
+                               bytes += sizeof(bitmap_super_t);
+                               bytes = ROUND_UP(bytes, 512);
+                               if (n > bytes)
+                                       n =  bytes;
+                       }
+                       if (awrite(&ato, buf, n) != n)
+                               goto err;
+                       written += n;
+               }
+       }
+
+       if (super.bblog_size != 0 &&
+           __le32_to_cpu(super.bblog_size) <= 100 &&
+           super.bblog_offset != 0 &&
+           (super.feature_map & __le32_to_cpu(MD_FEATURE_BAD_BLOCKS))) {
+               /* There is a bad block log */
+               unsigned long long bb_offset = sb_offset;
+               int bytes = __le32_to_cpu(super.bblog_size) * 512;
+               int written = 0;
+               struct align_fd afrom, ato;
+
+               init_afd(&afrom, from);
+               init_afd(&ato, to);
+
+               bb_offset += (int32_t)__le32_to_cpu(super.bblog_offset);
+
+               if (lseek64(from, bb_offset<<9, 0) < 0)
+                       goto err;
+               if (lseek64(to, bb_offset<<9, 0) < 0)
+                       goto err;
+
+               for (written = 0; written < bytes ; ) {
+                       int n = bytes - written;
+                       if (n > 4096)
+                               n = 4096;
+                       if (aread(&afrom, buf, n) != n)
+                               goto err;
+
+                       if (awrite(&ato, buf, n) != n)
+                               goto err;
+                       written += n;
+               }
+       }
+
+       free(buf);
+       return 0;
+
+err:
+       free(buf);
+       return 1;
+}
+
 static void detail_super1(struct supertype *st, char *homehost)
 {
        struct mdp_superblock_1 *sb = st->sb;
@@ -641,6 +790,62 @@ static void export_detail_super1(struct supertype *st)
                printf("MD_NAME=%.*s\n", len, sb->set_name);
 }
 
+static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
+{
+       struct mdp_superblock_1 *sb = st->sb;
+       unsigned long long offset;
+       int size;
+       __u64 *bbl, *bbp;
+       int i;
+
+       if  (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100
+            || !sb->bblog_offset){
+               printf("No bad-blocks list configured on %s\n", devname);
+               return 0;
+       }
+       if ((sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+           == 0) {
+               printf("Bad-blocks list is empty in %s\n", devname);
+               return 0;
+       }
+
+       size = __le32_to_cpu(sb->bblog_size)* 512;
+       if (posix_memalign((void**)&bbl, 4096, size) != 0) {
+               pr_err("could not allocate badblocks list\n");
+               return 0;
+       }
+       offset = __le64_to_cpu(sb->super_offset) +
+               (int)__le32_to_cpu(sb->bblog_offset);
+       offset <<= 9;
+       if (lseek64(fd, offset, 0) < 0) {
+               pr_err("Cannot seek to bad-blocks list\n");
+               return 1;
+       }
+       if (read(fd, bbl, size) != size) {
+               pr_err("Cannot read bad-blocks list\n");
+               return 1;
+       }
+       /* 64bits per entry. 10 bits is block-count, 54 bits is block
+        * offset.  Blocks are sectors unless bblog->shift makes them bigger
+        */
+       bbp = (__u64*)bbl;
+       printf("Bad-blocks on %s:\n", devname);
+       for (i = 0; i < size/8; i++, bbp++) {
+               __u64 bb = __le64_to_cpu(*bbp);
+               int count = bb & 0x3ff;
+               unsigned long long sector = bb >> 10;
+
+               if (bb + 1 == 0)
+                       break;
+
+               sector <<= sb->bblog_shift;
+               count <<= sb->bblog_shift;
+
+               printf("%20llu for %d sectors\n", sector, count);
+       }
+       return 0;
+}
+
 #endif
 
 static int match_home1(struct supertype *st, char *homehost)
@@ -704,20 +909,32 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
                role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
 
        super_offset = __le64_to_cpu(sb->super_offset);
-       data_size = __le64_to_cpu(sb->size);
+       if (info->array.level <= 0)
+               data_size = __le64_to_cpu(sb->data_size);
+       else
+               data_size = __le64_to_cpu(sb->size);
        if (info->data_offset < super_offset) {
                unsigned long long end;
                info->space_before = info->data_offset;
                end = super_offset;
-               if (info->bitmap_offset < 0)
-                       end += info->bitmap_offset;
+
+               if (sb->bblog_offset && sb->bblog_size) {
+                       unsigned long long bboffset = super_offset;
+                       bboffset += (int32_t)__le32_to_cpu(sb->bblog_offset);
+                       if (bboffset < end)
+                               end = bboffset;
+               }
+
+               if (super_offset + info->bitmap_offset < end)
+                       end = super_offset + info->bitmap_offset;
+
                if (info->data_offset + data_size < end)
                        info->space_after = end - data_size - info->data_offset;
                else
                        info->space_after = 0;
        } else {
-               info->space_before = (info->data_offset -
-                                     super_offset);
+               unsigned long long earliest;
+               earliest = super_offset + (32+4)*2; /* match kernel */
                if (info->bitmap_offset > 0) {
                        unsigned long long bmend = info->bitmap_offset;
                        unsigned long long size = __le64_to_cpu(bsb->sync_size);
@@ -726,15 +943,31 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
                        size += sizeof(bitmap_super_t);
                        size = ROUND_UP(size, 4096);
                        size /= 512;
-                       size += bmend;
-                       if (size < info->space_before)
-                               info->space_before -= size;
-                       else
-                               info->space_before = 0;
-               } else
-                       info->space_before -= 8; /* superblock */
+                       bmend += size;
+                       if (bmend > earliest)
+                               bmend = earliest;
+               }
+               if (sb->bblog_offset && sb->bblog_size) {
+                       unsigned long long bbend = super_offset;
+                       bbend += (int32_t)__le32_to_cpu(sb->bblog_offset);
+                       bbend += __le32_to_cpu(sb->bblog_size);
+                       if (bbend > earliest)
+                               earliest = bbend;
+               }
+               if (earliest < info->data_offset)
+                       info->space_before = info->data_offset - earliest;
+               else
+                       info->space_before = 0;
                info->space_after = misc->device_size - data_size - info->data_offset;
        }
+       if (info->space_before == 0 && info->space_after == 0) {
+               /* It will look like we don't support data_offset changes,
+                * be we do - it's just that there is no room.
+                * A change that reduced the number of devices should
+                * still be allowed, so set the otherwise useless value of '1'
+                */
+               info->space_after = 1;
+       }
 
        info->disk.raid_disk = -1;
        switch(role) {
@@ -759,6 +992,11 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
        strncpy(info->name, sb->set_name, 32);
        info->name[32] = 0;
 
+       if ((__le32_to_cpu(sb->feature_map)&MD_FEATURE_REPLACEMENT)) {
+               info->disk.state &= ~(1 << MD_DISK_SYNC);
+               info->disk.state |=  1 << MD_DISK_REPLACEMENT;
+       }
+
        if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RECOVERY_OFFSET))
                info->recovery_start = __le32_to_cpu(sb->recovery_offset);
        else
@@ -766,7 +1004,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
 
        if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
                info->reshape_active = 1;
-               if (info->array.level == 10)
+               if ((sb->feature_map & __le32_to_cpu(MD_FEATURE_NEW_OFFSET)) &&
+                   sb->new_offset != 0)
                        info->reshape_active |= RESHAPE_NO_BACKUP;
                info->reshape_progress = __le64_to_cpu(sb->reshape_position);
                info->new_level = __le32_to_cpu(sb->new_level);
@@ -819,6 +1058,21 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
        int rv = 0;
        struct mdp_superblock_1 *sb = st->sb;
 
+       if (strcmp(update, "homehost") == 0 &&
+           homehost) {
+               /* Note that 'homehost' is special as it is really
+                * a "name" update.
+                */
+               char *c;
+               update = "name";
+               c = strchr(sb->set_name, ':');
+               if (c)
+                       strncpy(info->name, c+1, 31 - (c-sb->set_name));
+               else
+                       strncpy(info->name, sb->set_name, 32);
+               info->name[32] = 0;
+       }
+
        if (strcmp(update, "force-one")==0) {
                /* Not enough devices for a working array,
                 * so bring this one up-to-date
@@ -840,7 +1094,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
        } else if (strcmp(update, "assemble")==0) {
                int d = info->disk.number;
                int want;
-               if (info->disk.state == 6)
+               if (info->disk.state & (1<<MD_DISK_ACTIVE))
                        want = info->disk.raid_disk;
                else
                        want = 0xFFFF;
@@ -927,27 +1181,29 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                 */
                unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
                unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
-               long bitmap_offset = (long)__le64_to_cpu(sb->bitmap_offset);
+               long bitmap_offset = (long)(int32_t)__le32_to_cpu(sb->bitmap_offset);
                long bm_sectors = 0;
                long space;
 
+#ifndef MDASSEMBLE
                if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
                        struct bitmap_super_s *bsb;
                        bsb = (struct bitmap_super_s *)(((char*)sb)+MAX_SB_SIZE);
                        bm_sectors = bitmap_sectors(bsb);
                }
-
+#endif
                if (sb_offset < data_offset) {
-                       /* 1.1 or 1.2.  Put bbl just before data
+                       /* 1.1 or 1.2.  Put bbl after bitmap leaving at least 32K
                         */
                        long bb_offset;
-                       space = data_offset - sb_offset;
-                       bb_offset = space - 8;
+                       bb_offset = sb_offset + 8;
                        if (bm_sectors && bitmap_offset > 0)
-                               space -= (bitmap_offset + bm_sectors);
-                       else
-                               space -= 8; /* The superblock */
-                       if (space >= 8) {
+                               bb_offset = bitmap_offset + bm_sectors;
+                       while (bb_offset < (long)sb_offset + 8 + 32*2
+                              && bb_offset + 8+8 <= (long)data_offset)
+                               /* too close to bitmap, and room to grow */
+                               bb_offset += 8;
+                       if (bb_offset + 8 <= (long)data_offset) {
                                sb->bblog_size = __cpu_to_le16(8);
                                sb->bblog_offset = __cpu_to_le32(bb_offset);
                        }
@@ -971,16 +1227,6 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                        sb->bblog_shift = 0;
                        sb->bblog_offset = 0;
                }
-       } else if (strcmp(update, "homehost") == 0 &&
-                  homehost) {
-               char *c;
-               update = "name";
-               c = strchr(sb->set_name, ':');
-               if (c)
-                       strncpy(info->name, c+1, 31 - (c-sb->set_name));
-               else
-                       strncpy(info->name, sb->set_name, 32);
-               info->name[32] = 0;
        } else if (strcmp(update, "name") == 0) {
                if (info->name[0] == 0)
                        sprintf(info->name, "%d", info->array.md_minor);
@@ -999,12 +1245,65 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                /* set data_size to device size less data_offset */
                struct misc_dev_info *misc = (struct misc_dev_info*)
                        (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE);
-               printf("Size was %llu\n", (unsigned long long)
-                      __le64_to_cpu(sb->data_size));
                sb->data_size = __cpu_to_le64(
                        misc->device_size - __le64_to_cpu(sb->data_offset));
-               printf("Size is %llu\n", (unsigned long long)
-                      __le64_to_cpu(sb->data_size));
+       } else if (strcmp(update, "revert-reshape") == 0) {
+               rv = -2;
+               if (!(sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)))
+                       pr_err("No active reshape to revert on %s\n",
+                              devname);
+               else {
+                       __u32 temp;
+                       unsigned long long reshape_sectors;
+                       long reshape_chunk;
+                       rv = 0;
+                       /* reshape_position is a little messy.
+                        * Its value must be a multiple of the larger
+                        * chunk size, and of the "after" data disks.
+                        * So when reverting we need to change it to
+                        * be a multiple of the new "after" data disks,
+                        * which is the old "before".
+                        * If it isn't already a multiple of 'before',
+                        * the only thing we could do would be
+                        * copy some block around on the disks, which
+                        * is easy to get wrong.
+                        * So we reject a revert-reshape unless the
+                        * alignment is good.
+                        */
+                       if (__le32_to_cpu(sb->level) >= 4 &&
+                           __le32_to_cpu(sb->level) <= 6) {
+                               reshape_sectors = __le64_to_cpu(sb->reshape_position);
+                               reshape_chunk = __le32_to_cpu(sb->new_chunk);
+                               reshape_chunk *= __le32_to_cpu(sb->raid_disks) - __le32_to_cpu(sb->delta_disks) -
+                                       (__le32_to_cpu(sb->level)==6 ? 2 : 1);
+                               if (reshape_sectors % reshape_chunk) {
+                                       pr_err("Reshape position is not suitably aligned.\n");
+                                       pr_err("Try normal assembly and stop again\n");
+                                       return -2;
+                               }
+                       }
+                       sb->raid_disks = __cpu_to_le32(__le32_to_cpu(sb->raid_disks) -
+                                                      __le32_to_cpu(sb->delta_disks));
+                       if (sb->delta_disks == 0)
+                               sb->feature_map ^= __cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
+                       else
+                               sb->delta_disks = __cpu_to_le32(-__le32_to_cpu(sb->delta_disks));
+
+                       temp = sb->new_layout;
+                       sb->new_layout = sb->layout;
+                       sb->layout = temp;
+
+                       temp = sb->new_chunk;
+                       sb->new_chunk = sb->chunksize;
+                       sb->chunksize = temp;
+
+                       if (sb->feature_map & __cpu_to_le32(MD_FEATURE_NEW_OFFSET)) {
+                               long offset_delta = (int32_t)__le32_to_cpu(sb->new_offset);
+                               sb->data_offset = __cpu_to_le64(__le64_to_cpu(sb->data_offset) + offset_delta);
+                               sb->new_offset = __cpu_to_le32(-offset_delta);
+                               sb->data_size = __cpu_to_le64(__le64_to_cpu(sb->data_size) - offset_delta);
+                       }
+               }
        } else if (strcmp(update, "_reshape_progress")==0)
                sb->reshape_position = __cpu_to_le64(info->reshape_progress);
        else if (strcmp(update, "writemostly")==0)
@@ -1029,7 +1328,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
        int sbsize;
 
        if (posix_memalign((void**)&sb, 4096, SUPER1_SIZE) != 0) {
-               pr_err("%s could not allocate superblock\n", __func__);
+               pr_err("could not allocate superblock\n");
                return 0;
        }
        memset(sb, 0, SUPER1_SIZE);
@@ -1193,8 +1492,6 @@ static int store_super1(struct supertype *st, int fd)
                return -EINVAL;
        }
 
-
-
        if (sb_offset != __le64_to_cpu(sb->super_offset) &&
            0 != __le64_to_cpu(sb->super_offset)
                ) {
@@ -1250,14 +1547,13 @@ static int write_init_super1(struct supertype *st)
        int rfd;
        int rv = 0;
        unsigned long long bm_space;
-       unsigned long long reserved;
        struct devinfo *di;
        unsigned long long dsize, array_size;
-       unsigned long long sb_offset, headroom;
+       unsigned long long sb_offset;
        unsigned long long data_offset;
 
        for (di = st->info; di; di = di->next) {
-               if (di->disk.state == 1)
+               if (di->disk.state & (1 << MD_DISK_FAULTY))
                        continue;
                if (di->fd < 0)
                        continue;
@@ -1282,7 +1578,7 @@ static int write_init_super1(struct supertype *st)
                sb->events = 0;
 
                refst = dup_super(st);
-               if (load_super1(refst, di->fd, NULL)==0) {
+               if (load_super1(refst, di->fd, NULL)==0) {
                        struct mdp_superblock_1 *refsb = refst->sb;
 
                        memcpy(sb->device_uuid, refsb->device_uuid, 16);
@@ -1312,7 +1608,6 @@ static int write_init_super1(struct supertype *st)
                        goto error_out;
                }
 
-
                /*
                 * Calculate the position of the superblock.
                 * It is always aligned to a 4K boundary and
@@ -1320,35 +1615,25 @@ static int write_init_super1(struct supertype *st)
                 * 0: At least 8K, but less than 12K, from end of device
                 * 1: At start of device
                 * 2: 4K from start of device.
-                * Depending on the array size, we might leave extra space
-                * for a bitmap.
-                * Also leave 4K for bad-block log.
+                * data_offset has already been set.
                 */
                array_size = __le64_to_cpu(sb->size);
                /* work out how much space we left for a bitmap,
                 * Add 8 sectors for bad block log */
                bm_space = choose_bm_space(array_size) + 8;
 
-               /* We try to leave 0.1% at the start for reshape
-                * operations, but limit this to 128Meg (0.1% of 10Gig)
-                * which is plenty for efficient reshapes
-                * However we make it at least 2 chunks as one chunk
-                * is minimum needed for reshape.
-                */
-               headroom = 128 * 1024 * 2;
-               while  (headroom << 10 > array_size &&
-                       headroom/2 >= __le32_to_cpu(sb->chunksize) * 2)
-                       headroom >>= 1;
-
                data_offset = di->data_offset;
+               if (data_offset == INVALID_SECTORS)
+                       data_offset = st->data_offset;
                switch(st->minor_version) {
                case 0:
+                       if (data_offset == INVALID_SECTORS)
+                               data_offset = 0;
                        sb_offset = dsize;
                        sb_offset -= 8*2;
                        sb_offset &= ~(4*2-1);
+                       sb->data_offset = __cpu_to_le64(data_offset);
                        sb->super_offset = __cpu_to_le64(sb_offset);
-                       if (data_offset == INVALID_SECTORS)
-                               sb->data_offset = 0;
                        if (sb_offset < array_size + bm_space)
                                bm_space = sb_offset - array_size;
                        sb->data_size = __cpu_to_le64(sb_offset - bm_space);
@@ -1359,64 +1644,37 @@ static int write_init_super1(struct supertype *st)
                        break;
                case 1:
                        sb->super_offset = __cpu_to_le64(0);
-                       if (data_offset == INVALID_SECTORS) {
-                               reserved = bm_space + 4*2;
-                               if (reserved < headroom)
-                                       reserved = headroom;
-                               if (reserved + array_size > dsize)
-                                       reserved = dsize - array_size;
-                               /* Try for multiple of 1Meg so it is nicely aligned */
-                               #define ONE_MEG (2*1024)
-                               if (reserved > ONE_MEG)
-                                       reserved = (reserved/ONE_MEG) * ONE_MEG;
-
-                               /* force 4K alignment */
-                               reserved &= ~7ULL;
-
-                       } else
-                               reserved = data_offset;
-
-                       sb->data_offset = __cpu_to_le64(reserved);
-                       sb->data_size = __cpu_to_le64(dsize - reserved);
-                       if (reserved >= 16) {
+                       if (data_offset == INVALID_SECTORS)
+                               data_offset = 16;
+
+                       sb->data_offset = __cpu_to_le64(data_offset);
+                       sb->data_size = __cpu_to_le64(dsize - data_offset);
+                       if (data_offset >= 8 + 32*2 + 8) {
+                               sb->bblog_size = __cpu_to_le16(8);
+                               sb->bblog_offset = __cpu_to_le32(8 + 32*2);
+                       } else if (data_offset >= 16) {
                                sb->bblog_size = __cpu_to_le16(8);
-                               sb->bblog_offset = __cpu_to_le32(reserved-8);
+                               sb->bblog_offset = __cpu_to_le32(data_offset-8);
                        }
                        break;
                case 2:
                        sb_offset = 4*2;
-                       sb->super_offset = __cpu_to_le64(4*2);
-                       if (data_offset == INVALID_SECTORS) {
-                               if (4*2 + 4*2 + bm_space + array_size
-                                   > dsize)
-                                       bm_space = dsize - array_size
-                                               - 4*2 - 4*2;
-
-                               reserved = bm_space + 4*2 + 4*2;
-                               if (reserved < headroom)
-                                       reserved = headroom;
-                               if (reserved + array_size > dsize)
-                                       reserved = dsize - array_size;
-                               /* Try for multiple of 1Meg so it is nicely aligned */
-                               #define ONE_MEG (2*1024)
-                               if (reserved > ONE_MEG)
-                                       reserved = (reserved/ONE_MEG) * ONE_MEG;
-
-                               /* force 4K alignment */
-                               reserved &= ~7ULL;
-
-                       } else
-                               reserved = data_offset;
-
-                       sb->data_offset = __cpu_to_le64(reserved);
-                       sb->data_size = __cpu_to_le64(dsize - reserved);
-                       if (reserved >= 16+16) {
+                       sb->super_offset = __cpu_to_le64(sb_offset);
+                       if (data_offset == INVALID_SECTORS)
+                               data_offset = 24;
+
+                       sb->data_offset = __cpu_to_le64(data_offset);
+                       sb->data_size = __cpu_to_le64(dsize - data_offset);
+                       if (data_offset >= 16 + 32*2 + 8) {
+                               sb->bblog_size = __cpu_to_le16(8);
+                               sb->bblog_offset = __cpu_to_le32(8 + 32*2);
+                       } else if (data_offset >= 16+16) {
                                sb->bblog_size = __cpu_to_le16(8);
                                /* '8' sectors for the bblog, and another '8'
                                 * because we want offset from superblock, not
                                 * start of device.
                                 */
-                               sb->bblog_offset = __cpu_to_le32(reserved-8-8);
+                               sb->bblog_offset = __cpu_to_le32(data_offset-8-8);
                        }
                        break;
                default:
@@ -1426,6 +1684,10 @@ static int write_init_super1(struct supertype *st)
                        rv = -EINVAL;
                        goto out;
                }
+               if (conf_get_create_info()->bblist == 0) {
+                       sb->bblog_size = 0;
+                       sb->bblog_offset = 0;
+               }
 
                sb->sb_csum = calc_sb_1_csum(sb);
                rv = store_super1(st, di->fd);
@@ -1464,7 +1726,7 @@ static int compare_super1(struct supertype *st, struct supertype *tst)
 
        if (!first) {
                if (posix_memalign((void**)&first, 4096, SUPER1_SIZE) != 0) {
-                       pr_err("%s could not allocate superblock\n", __func__);
+                       pr_err("could not allocate superblock\n");
                        return 1;
                }
                memcpy(first, second, SUPER1_SIZE);
@@ -1567,9 +1829,6 @@ static int load_super1(struct supertype *st, int fd, char *devname)
                return -EINVAL;
        }
 
-       ioctl(fd, BLKFLSBUF, 0); /* make sure we read current data */
-
-
        if (lseek64(fd, sb_offset << 9, 0)< 0LL) {
                if (devname)
                        pr_err("Cannot seek to superblock on %s: %s\n",
@@ -1578,8 +1837,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
        }
 
        if (posix_memalign((void**)&super, 4096, SUPER1_SIZE) != 0) {
-               pr_err("%s could not allocate superblock\n",
-                       __func__);
+               pr_err("could not allocate superblock\n");
                return 1;
        }
 
@@ -1619,6 +1877,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
 
        misc = (struct misc_dev_info*) (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
        misc->device_size = dsize;
+       if (st->data_offset == INVALID_SECTORS)
+               st->data_offset = __le64_to_cpu(super->data_offset);
 
        /* Now check on the bitmap superblock */
        if ((__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) == 0)
@@ -1643,15 +1903,15 @@ static int load_super1(struct supertype *st, int fd, char *devname)
        return 0;
 }
 
-
 static struct supertype *match_metadata_desc1(char *arg)
 {
        struct supertype *st = xcalloc(1, sizeof(*st));
 
-       st->container_dev = NoMdDev;
+       st->container_devnm[0] = 0;
        st->ss = &super1;
        st->max_devs = MAX_DEVS;
        st->sb = NULL;
+       st->data_offset = INVALID_SECTORS;
        /* leading zeros can be safely ignored.  --detail generates them. */
        while (*arg == '0')
                arg++;
@@ -1688,19 +1948,17 @@ static struct supertype *match_metadata_desc1(char *arg)
  * superblock type st, and reserving 'reserve' sectors for
  * a possible bitmap
  */
-static __u64 _avail_size1(struct supertype *st, __u64 devsize,
-                         unsigned long long data_offset, int chunksize)
+static __u64 avail_size1(struct supertype *st, __u64 devsize,
+                        unsigned long long data_offset)
 {
        struct mdp_superblock_1 *super = st->sb;
        int bmspace = 0;
+       int bbspace = 0;
        if (devsize < 24)
                return 0;
 
-       if (super == NULL)
-               /* creating:  allow suitable space for bitmap */
-               bmspace = choose_bm_space(devsize);
 #ifndef MDASSEMBLE
-       else if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+       if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
                /* hot-add. allow for actual size of bitmap */
                struct bitmap_super_s *bsb;
                bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
@@ -1708,20 +1966,20 @@ static __u64 _avail_size1(struct supertype *st, __u64 devsize,
        }
 #endif
        /* Allow space for bad block log */
-       if (super && super->bblog_size)
-               devsize -= __le16_to_cpu(super->bblog_size);
-       else
-               devsize -= 8;
-
+       if (super->bblog_size)
+               bbspace = __le16_to_cpu(super->bblog_size);
 
        if (st->minor_version < 0)
                /* not specified, so time to set default */
                st->minor_version = 2;
 
+       if (data_offset == INVALID_SECTORS)
+               data_offset = st->data_offset;
+
        if (data_offset != INVALID_SECTORS)
                switch(st->minor_version) {
                case 0:
-                       return devsize - data_offset - 8*2;
+                       return devsize - data_offset - 8*2 - bbspace;
                case 1:
                case 2:
                        return devsize - data_offset;
@@ -1731,36 +1989,19 @@ static __u64 _avail_size1(struct supertype *st, __u64 devsize,
 
        devsize -= bmspace;
 
-       if (super == NULL && st->minor_version > 0) {
-               /* haven't committed to a size yet, so allow some
-                * slack for space for reshape.
-                * Limit slack to 128M, but aim for about 0.1%
-                */
-               unsigned long long headroom = 128*1024*2;
-               while ((headroom << 10) > devsize &&
-                      (chunksize == 0 ||
-                       headroom / 2 >= ((unsigned)chunksize*2)*2))
-                       headroom >>= 1;
-               devsize -= headroom;
-       }
        switch(st->minor_version) {
        case 0:
                /* at end */
-               return ((devsize - 8*2 ) & ~(4*2-1));
+               return ((devsize - 8*2 - bbspace ) & ~(4*2-1));
        case 1:
                /* at start, 4K for superblock and possible bitmap */
-               return devsize - 4*2;
+               return devsize - 4*2 - bbspace;
        case 2:
                /* 4k from start, 4K for superblock and possible bitmap */
-               return devsize - (4+4)*2;
+               return devsize - (4+4)*2 - bbspace;
        }
        return 0;
 }
-static __u64 avail_size1(struct supertype *st, __u64 devsize,
-                        unsigned long long data_offset)
-{
-       return _avail_size1(st, devsize, data_offset, 0);
-}
 
 static int
 add_internal_bitmap1(struct supertype *st,
@@ -1789,7 +2030,6 @@ add_internal_bitmap1(struct supertype *st,
        bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
        int uuid[4];
 
-
        if (__le64_to_cpu(sb->data_size) == 0)
                /* Must be creating the array, else data_size would be non-zero */
                creating = 1;
@@ -1803,8 +2043,8 @@ add_internal_bitmap1(struct supertype *st,
                         * been left.
                         */
                        offset = 0;
-                       room = choose_bm_space(__le64_to_cpu(sb->size));
                        bbl_size = 8;
+                       room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
                } else {
                        room = __le64_to_cpu(sb->super_offset)
                                - __le64_to_cpu(sb->data_offset)
@@ -1830,8 +2070,8 @@ add_internal_bitmap1(struct supertype *st,
        case 2: /* between superblock and data */
                if (creating) {
                        offset = 4*2;
-                       room = choose_bm_space(__le64_to_cpu(sb->size));
                        bbl_size = 8;
+                       room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
                } else {
                        room = __le64_to_cpu(sb->data_offset)
                                - __le64_to_cpu(sb->super_offset);
@@ -1860,6 +2100,10 @@ add_internal_bitmap1(struct supertype *st,
                /* Limit to 128K of bitmap when chunk size not requested */
                room = 128*2;
 
+       if (room <= 1)
+               /* No room for a bitmap */
+               return 0;
+
        max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
 
        min_chunk = 4096; /* sub-page chunks don't work yet.. */
@@ -1992,7 +2236,9 @@ static int validate_geometry1(struct supertype *st, int level,
                              char *subdev, unsigned long long *freesize,
                              int verbose)
 {
-       unsigned long long ldsize;
+       unsigned long long ldsize, devsize;
+       int bmspace;
+       unsigned long long headroom;
        int fd;
 
        if (level == LEVEL_CONTAINER) {
@@ -2000,12 +2246,16 @@ static int validate_geometry1(struct supertype *st, int level,
                        pr_err("1.x metadata does not support containers\n");
                return 0;
        }
-       if (chunk && *chunk == UnSet)
+       if (*chunk == UnSet)
                *chunk = DEFAULT_CHUNK;
 
        if (!subdev)
                return 1;
 
+       if (st->minor_version < 0)
+               /* not specified, so time to set default */
+               st->minor_version = 2;
+
        fd = open(subdev, O_RDONLY|O_EXCL, 0);
        if (fd < 0) {
                if (verbose)
@@ -2020,11 +2270,118 @@ static int validate_geometry1(struct supertype *st, int level,
        }
        close(fd);
 
-       *freesize = _avail_size1(st, ldsize >> 9, data_offset, *chunk);
+       devsize = ldsize >> 9;
+       if (devsize < 24) {
+               *freesize = 0;
+               return 0;
+       }
+
+       /* creating:  allow suitable space for bitmap */
+       bmspace = choose_bm_space(devsize);
+
+       if (data_offset == INVALID_SECTORS)
+               data_offset = st->data_offset;
+       if (data_offset == INVALID_SECTORS)
+               switch (st->minor_version) {
+               case 0:
+                       data_offset = 0;
+                       break;
+               case 1:
+               case 2:
+                       /* Choose data offset appropriate for this device
+                        * and use as default for whole array.
+                        * The data_offset must allow for bitmap space
+                        * and base metadata, should allow for some headroom
+                        * for reshape, and should be rounded to multiple
+                        * of 1M.
+                        * Headroom is limited to 128M, but aim for about 0.1%
+                        */
+                       headroom = 128*1024*2;
+                       while ((headroom << 10) > devsize &&
+                              (*chunk == 0 ||
+                               headroom / 2 >= ((unsigned)(*chunk)*2)*2))
+                               headroom >>= 1;
+                       data_offset = 12*2 + bmspace + headroom;
+                       #define ONE_MEG (2*1024)
+                       if (data_offset > ONE_MEG)
+                               data_offset = (data_offset / ONE_MEG) * ONE_MEG;
+                       break;
+               }
+       if (st->data_offset == INVALID_SECTORS)
+               st->data_offset = data_offset;
+       switch(st->minor_version) {
+       case 0: /* metadata at end.  Round down and subtract space to reserve */
+               devsize = (devsize & ~(4ULL*2-1));
+               /* space for metadata, bblog, bitmap */
+               devsize -= 8*2 + 8 + bmspace;
+               break;
+       case 1:
+       case 2:
+               devsize -= data_offset;
+               break;
+       }
+       *freesize = devsize;
        return 1;
 }
 #endif /* MDASSEMBLE */
 
+void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0)
+{
+       /* Create a v1.0 superblock based on 'info'*/
+       void *ret;
+       struct mdp_superblock_1 *sb;
+       int i;
+       int rfd;
+       unsigned long long offset;
+
+       if (posix_memalign(&ret, 4096, 1024) != 0)
+               return NULL;
+       sb = ret;
+       memset(ret, 0, 1024);
+       sb->magic = __cpu_to_le32(MD_SB_MAGIC);
+       sb->major_version = __cpu_to_le32(1);
+
+       copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
+       sprintf(sb->set_name, "%d", sb0->md_minor);
+       sb->ctime = __cpu_to_le32(info->array.ctime+1);
+       sb->level = __cpu_to_le32(info->array.level);
+       sb->layout = __cpu_to_le32(info->array.layout);
+       sb->size = __cpu_to_le64(info->component_size);
+       sb->chunksize = __cpu_to_le32(info->array.chunk_size/512);
+       sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
+       if (info->array.level > 0)
+               sb->data_size = sb->size;
+       else
+               sb->data_size = st->ss->avail_size(st, st->devsize/512, 0);
+       sb->resync_offset = MaxSector;
+       sb->max_dev = __cpu_to_le32(MD_SB_DISKS);
+       sb->dev_number = __cpu_to_le32(info->disk.number);
+       sb->utime = __cpu_to_le64(info->array.utime);
+
+       offset = st->devsize/512 - 8*2;
+       offset &= ~(4*2-1);
+       sb->super_offset = __cpu_to_le64(offset);
+       //*(__u64*)(st->other + 128 + 8 + 8) = __cpu_to_le64(offset);
+
+       if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+           read(rfd, sb->device_uuid, 16) != 16) {
+               __u32 r[4] = {random(), random(), random(), random()};
+               memcpy(sb->device_uuid, r, 16);
+       }
+       if (rfd >= 0)
+               close(rfd);
+
+       for (i = 0; i < MD_SB_DISKS; i++) {
+               int state = sb0->disks[i].state;
+               sb->dev_roles[i] = 0xFFFF;
+               if ((state & (1<<MD_DISK_SYNC)) &&
+                   !(state & (1<<MD_DISK_FAULTY)))
+                       sb->dev_roles[i] = __cpu_to_le16(sb0->disks[i].raid_disk);
+       }
+       sb->sb_csum = calc_sb_1_csum(sb);
+       return ret;
+}
+
 struct superswitch super1 = {
 #ifndef MDASSEMBLE
        .examine_super = examine_super1,
@@ -2036,6 +2393,8 @@ struct superswitch super1 = {
        .write_init_super = write_init_super1,
        .validate_geometry = validate_geometry1,
        .add_to_super = add_to_super1,
+       .examine_badblocks = examine_badblocks_super1,
+       .copy_metadata = copy_metadata1,
 #endif
        .match_home = match_home1,
        .uuid_from_super = uuid_from_super1,