/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#include "mdadm.h"
__u64 utime; /* 40 bits second, 24 btes microseconds */
__u64 events; /* incremented when superblock updated */
__u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
- __u32 sb_csum; /* checksum upto devs[max_dev] */
- __u32 max_dev; /* size of devs[] array to consider */
+ __u32 sb_csum; /* checksum upto dev_roles[max_dev] */
+ __u32 max_dev; /* size of dev_roles[] array to consider */
__u8 pad3[64-32]; /* set to 0 when writing */
/* device state information. Indexed by dev_number.
{
struct mdp_superblock_1 *sb = st->sb;
time_t atime;
- int d;
+ unsigned int d;
int role;
+ int delta_extra = 0;
int i;
char *c;
int l = homehost ? strlen(homehost) : 0;
printf(" Avail Dev Size : %llu%s\n",
(unsigned long long)__le64_to_cpu(sb->data_size),
human_size(__le64_to_cpu(sb->data_size)<<9));
- if (__le32_to_cpu(sb->level) >= 0) {
+ if (__le32_to_cpu(sb->level) > 0) {
int ddsks=0;
switch(__le32_to_cpu(sb->level)) {
case 1: ddsks=1;break;
human_size(__le64_to_cpu(sb->reshape_position)<<9));
if (__le32_to_cpu(sb->delta_disks)) {
printf(" Delta Devices : %d", __le32_to_cpu(sb->delta_disks));
- if (__le32_to_cpu(sb->delta_disks))
- printf(" (%d->%d)\n",
- __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
- __le32_to_cpu(sb->raid_disks));
- else
- printf(" (%d->%d)\n", __le32_to_cpu(sb->raid_disks),
- __le32_to_cpu(sb->raid_disks)+__le32_to_cpu(sb->delta_disks));
+ printf(" (%d->%d)\n",
+ __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
+ __le32_to_cpu(sb->raid_disks));
+ if ((int)__le32_to_cpu(sb->delta_disks) < 0)
+ delta_extra = -__le32_to_cpu(sb->delta_disks);
}
if (__le32_to_cpu(sb->new_level) != __le32_to_cpu(sb->level)) {
c = map_num(pers, __le32_to_cpu(sb->new_level));
c = map_num(r5layout, __le32_to_cpu(sb->new_layout));
printf(" New Layout : %s\n", c?c:"-unknown-");
}
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->new_layout));
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 10) {
printf(" New Layout :");
print_r10_layout(__le32_to_cpu(sb->new_layout));
c = map_num(r5layout, __le32_to_cpu(sb->layout));
printf(" Layout : %s\n", c?c:"-unknown-");
}
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 10) {
int lo = __le32_to_cpu(sb->layout);
printf(" Layout :");
#endif
printf(" Device Role : ");
d = __le32_to_cpu(sb->dev_number);
- if (d < sb->raid_disks)
+ if (d < __le32_to_cpu(sb->max_dev))
role = __le16_to_cpu(sb->dev_roles[d]);
else
role = 0xFFFF;
printf("Active device %d\n", role);
printf(" Array State : ");
- for (d=0; d<__le32_to_cpu(sb->raid_disks); d++) {
+ for (d=0; d<__le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
int cnt = 0;
int me = 0;
- int i;
+ unsigned int i;
for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
- int role = __le16_to_cpu(sb->dev_roles[i]);
+ unsigned int role = __le16_to_cpu(sb->dev_roles[i]);
if (role == d) {
if (i == __le32_to_cpu(sb->dev_number))
me = 1;
cuuid[i] = super->set_uuid[i];
}
-static void getinfo_super1(struct supertype *st, struct mdinfo *info)
+static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
{
struct mdp_superblock_1 *sb = st->sb;
int working = 0;
- int i;
- int role;
+ unsigned int i;
+ unsigned int role;
+ unsigned int map_disks = info->array.raid_disks;
info->array.major_version = 1;
info->array.minor_version = st->minor_version;
info->disk.raid_disk = -1;
switch(role) {
case 0xFFFF:
- info->disk.state = 2; /* spare: ACTIVE, not sync, not faulty */
+ info->disk.state = 0; /* spare: not active, not sync, not faulty */
break;
case 0xFFFE:
info->disk.state = 1; /* faulty */
strncpy(info->name, sb->set_name, 32);
info->name[32] = 0;
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RECOVERY_OFFSET))
+ info->recovery_start = __le32_to_cpu(sb->recovery_offset);
+ else
+ info->recovery_start = MaxSector;
+
if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
info->reshape_active = 1;
info->reshape_progress = __le64_to_cpu(sb->reshape_position);
info->delta_disks = __le32_to_cpu(sb->delta_disks);
info->new_layout = __le32_to_cpu(sb->new_layout);
info->new_chunk = __le32_to_cpu(sb->new_chunk)<<9;
+ if (info->delta_disks < 0)
+ info->array.raid_disks -= info->delta_disks;
} else
info->reshape_active = 0;
- for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
+ if (map)
+ for (i=0; i<map_disks; i++)
+ map[i] = 0;
+ for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
role = __le16_to_cpu(sb->dev_roles[i]);
- if (/*role == 0xFFFF || */role < info->array.raid_disks)
+ if (/*role == 0xFFFF || */role < (unsigned) info->array.raid_disks) {
working++;
+ if (map && role < map_disks)
+ map[role] = 1;
+ }
}
info->array.working_disks = working;
}
+static struct mdinfo *container_content1(struct supertype *st, char *subarray)
+{
+ struct mdinfo *info;
+
+ if (subarray)
+ return NULL;
+
+ info = malloc(sizeof(*info));
+ getinfo_super1(st, info, NULL);
+ return info;
+}
+
static int update_super1(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
int uuid_set, char *homehost)
{
- /* NOTE: for 'assemble' and 'force' we need to return non-zero if any change was made.
- * For others, the return value is ignored.
+ /* NOTE: for 'assemble' and 'force' we need to return non-zero
+ * if any change was made. For others, the return value is
+ * ignored.
*/
int rv = 0;
struct mdp_superblock_1 *sb = st->sb;
if (sb->events != __cpu_to_le64(info->events))
rv = 1;
sb->events = __cpu_to_le64(info->events);
- }
- if (strcmp(update, "force-array")==0) {
+ } else if (strcmp(update, "force-array")==0) {
/* Degraded array and 'force' requests to
* maybe need to mark it 'clean'.
*/
switch(__le32_to_cpu(sb->level)) {
case 5: case 4: case 6:
/* need to force clean */
- if (sb->resync_offset != ~0ULL)
+ if (sb->resync_offset != MaxSector)
rv = 1;
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
}
- }
- if (strcmp(update, "assemble")==0) {
+ } else if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
int want;
if (info->disk.state == 6)
- want = __cpu_to_le32(info->disk.raid_disk);
+ want = info->disk.raid_disk;
else
want = 0xFFFF;
- if (sb->dev_roles[d] != want) {
- sb->dev_roles[d] = want;
+ if (sb->dev_roles[d] != __cpu_to_le16(want)) {
+ sb->dev_roles[d] = __cpu_to_le16(want);
rv = 1;
}
- }
- if (strcmp(update, "linear-grow-new") == 0) {
- int i;
+ } else if (strcmp(update, "linear-grow-new") == 0) {
+ unsigned int i;
int rfd, fd;
- int max = __le32_to_cpu(sb->max_dev);
+ unsigned int max = __le32_to_cpu(sb->max_dev);
for (i=0 ; i < max ; i++)
if (__le16_to_cpu(sb->dev_roles[i]) >= 0xfffe)
__u32 r[4] = {random(), random(), random(), random()};
memcpy(sb->device_uuid, r, 16);
}
+ if (rfd >= 0)
+ close(rfd);
sb->dev_roles[i] =
__cpu_to_le16(info->disk.raid_disk);
ds - __le64_to_cpu(sb->data_offset));
}
}
- }
- if (strcmp(update, "linear-grow-update") == 0) {
+ } else if (strcmp(update, "linear-grow-update") == 0) {
sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
sb->dev_roles[info->disk.number] =
__cpu_to_le16(info->disk.raid_disk);
- }
- if (strcmp(update, "resync") == 0) {
+ } else if (strcmp(update, "resync") == 0) {
/* make sure resync happens */
sb->resync_offset = 0ULL;
- }
- if (strcmp(update, "uuid") == 0) {
+ } else if (strcmp(update, "uuid") == 0) {
copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
bm = (struct bitmap_super_s*)(st->sb+1024);
memcpy(bm->uuid, sb->set_uuid, 16);
}
- }
- if (strcmp(update, "homehost") == 0 &&
+ } else if (strcmp(update, "no-bitmap") == 0) {
+ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
+ } else if (strcmp(update, "homehost") == 0 &&
homehost) {
char *c;
update = "name";
else
strncpy(info->name, sb->set_name, 32);
info->name[32] = 0;
- }
- if (strcmp(update, "name") == 0) {
+ } else if (strcmp(update, "name") == 0) {
if (info->name[0] == 0)
sprintf(info->name, "%d", info->array.md_minor);
memset(sb->set_name, 0, sizeof(sb->set_name));
strcat(sb->set_name, info->name);
} else
strcpy(sb->set_name, info->name);
- }
- if (strcmp(update, "devicesize") == 0 &&
+ } else if (strcmp(update, "devicesize") == 0 &&
__le64_to_cpu(sb->super_offset) <
__le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
misc->device_size - __le64_to_cpu(sb->data_offset));
printf("Size is %llu\n", (unsigned long long)
__le64_to_cpu(sb->data_size));
- }
- if (strcmp(update, "_reshape_progress")==0)
+ } else if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ else
+ rv = -1;
sb->sb_csum = calc_sb_1_csum(sb);
return rv;
sb->utime = sb->ctime;
sb->events = __cpu_to_le64(1);
if (info->state & (1<<MD_SB_CLEAN))
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
else
sb->resync_offset = 0;
sb->max_dev = __cpu_to_le32((1024- sizeof(struct mdp_superblock_1))/
else
*rp = 0xfffe;
- if (dk->number >= __le32_to_cpu(sb->max_dev) &&
+ if (dk->number >= (int)__le32_to_cpu(sb->max_dev) &&
__le32_to_cpu(sb->max_dev) < 384)
sb->max_dev = __cpu_to_le32(dk->number+1);
{
/* if the device is bigger than 8Gig, save 64k for bitmap usage,
* if bigger than 200Gig, save 128k
+ * NOTE: result must be multiple of 4K else bad things happen
+ * on 4K-sector devices.
*/
if (devsize < 64*2) return 0;
if (devsize - 64*2 >= 200*1024*1024*2)
return 4*2;
}
+static void free_super1(struct supertype *st);
+
#ifndef MDASSEMBLE
static int write_init_super1(struct supertype *st)
{
struct mdp_superblock_1 *sb = st->sb;
- struct supertype refst;
+ struct supertype *refst;
int rfd;
int rv = 0;
- int bm_space;
+ unsigned long long bm_space;
+ unsigned long long reserved;
struct devinfo *di;
unsigned long long dsize, array_size;
- long long sb_offset;
+ unsigned long long sb_offset;
for (di = st->info; di && ! rv ; di = di->next) {
if (di->disk.state == 1)
if (di->fd < 0)
continue;
- Kill(di->devname, 0, 1, 1);
- Kill(di->devname, 0, 1, 1);
+ while (Kill(di->devname, NULL, 0, 1, 1) == 0)
+ ;
sb->dev_number = __cpu_to_le32(di->disk.number);
if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
sb->events = 0;
- refst =*st;
- refst.sb = NULL;
- if (load_super1(&refst, di->fd, NULL)==0) {
- struct mdp_superblock_1 *refsb = refst.sb;
+ refst = dup_super(st);
+ if (load_super1(refst, di->fd, NULL)==0) {
+ struct mdp_superblock_1 *refsb = refst->sb;
memcpy(sb->device_uuid, refsb->device_uuid, 16);
if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
if (get_linux_version() >= 2006018)
sb->dev_number = refsb->dev_number;
}
- free(refsb);
+ free_super1(refst);
}
+ free(refst);
if (!get_dev_size(di->fd, NULL, &dsize))
return 1;
sb_offset &= ~(4*2-1);
sb->super_offset = __cpu_to_le64(sb_offset);
sb->data_offset = __cpu_to_le64(0);
- if (sb_offset - bm_space < array_size)
- bm_space = sb_offset - array_size;
+ if (sb_offset < array_size + bm_space)
+ bm_space = sb_offset - array_size;
sb->data_size = __cpu_to_le64(sb_offset - bm_space);
break;
case 1:
sb->super_offset = __cpu_to_le64(0);
- if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
- bm_space = dsize - __le64_to_cpu(sb->size) -4*2;
- sb->data_offset = __cpu_to_le64(bm_space + 4*2);
- sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
+ reserved = bm_space + 4*2;
+ /* Try for multiple of 1Meg so it is nicely aligned */
+ #define ONE_MEG (2*1024)
+ reserved = ((reserved + ONE_MEG-1)/ONE_MEG) * ONE_MEG;
+ if (reserved + __le64_to_cpu(sb->size) > dsize)
+ reserved = dsize - __le64_to_cpu(sb->size);
+ /* force 4K alignment */
+ reserved &= ~7ULL;
+
+ sb->data_offset = __cpu_to_le64(reserved);
+ sb->data_size = __cpu_to_le64(dsize - reserved);
break;
case 2:
sb_offset = 4*2;
> dsize)
bm_space = dsize - __le64_to_cpu(sb->size)
- 4*2 - 4*2;
- sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
- sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2
- - bm_space );
+
+ reserved = bm_space + 4*2 + 4*2;
+ /* Try for multiple of 1Meg so it is nicely aligned */
+ #define ONE_MEG (2*1024)
+ reserved = ((reserved + ONE_MEG-1)/ONE_MEG) * ONE_MEG;
+ if (reserved + __le64_to_cpu(sb->size) > dsize)
+ reserved = dsize - __le64_to_cpu(sb->size);
+ /* force 4K alignment */
+ reserved &= ~7ULL;
+
+ sb->data_offset = __cpu_to_le64(reserved);
+ sb->data_size = __cpu_to_le64(dsize - reserved);
break;
default:
return -EINVAL;
return 0;
}
-static void free_super1(struct supertype *st);
-
static int load_super1(struct supertype *st, int fd, char *devname)
{
unsigned long long dsize;
free_super1(st);
- if (st->subarray[0])
- return 1;
-
if (st->ss == NULL || st->minor_version == -1) {
int bestvers = -1;
struct supertype tst;
if (!st) return st;
memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
st->ss = &super1;
st->max_devs = 384;
st->sb = NULL;
return st;
}
if (strcmp(arg, "1.1") == 0 ||
- strcmp(arg, "1.01") == 0) {
+ strcmp(arg, "1.01") == 0
+ ) {
st->minor_version = 1;
return st;
}
if (strcmp(arg, "1.2") == 0 ||
+#ifndef DEFAULT_OLD_METADATA /* ifdef in super0.c */
+ strcmp(arg, "default") == 0 ||
+#endif /* DEFAULT_OLD_METADATA */
strcmp(arg, "1.02") == 0) {
st->minor_version = 2;
return st;
}
#endif
+ if (st->minor_version < 0)
+ /* not specified, so time to set default */
+ st->minor_version = 2;
+ if (super == NULL && st->minor_version > 0) {
+ /* haven't committed to a size yet, so allow some
+ * slack for alignment of data_offset.
+ * We haven't access to device details so allow
+ * 1 Meg if bigger than 1Gig
+ */
+ if (devsize > 1024*1024*2)
+ devsize -= 1024*2;
+ }
switch(st->minor_version) {
- case -1: /* no specified. Now time to set default */
- st->minor_version = 0;
- /* FALL THROUGH */
case 0:
/* at end */
return ((devsize - 8*2 ) & ~(4*2-1));
unsigned long long max_bits;
unsigned long long min_chunk;
long offset;
- int chunk = *chunkp;
+ unsigned long long chunk = *chunkp;
int room = 0;
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + 1024);
switch(st->minor_version) {
case 0:
- /* either 3K after the superblock, or some amount of space
- * before.
+ /* either 3K after the superblock (when hot-add),
+ * or some amount of space before.
*/
if (may_change) {
/* We are creating array, so we *know* how much room has
*/
offset = 0;
room = choose_bm_space(__le64_to_cpu(sb->size));
- if (room == 4*2) {
- /* make it 3K after the superblock */
- room = 3*2;
- offset = 2;
- }
} else {
room = __le64_to_cpu(sb->super_offset)
- __le64_to_cpu(sb->data_offset)
min_chunk *= 2;
bits = (bits+1)/2;
}
- if (chunk == UnSet)
+ if (chunk == UnSet) {
+ /* For practical purpose, 64Meg is a good
+ * default chunk size for internal bitmaps.
+ */
chunk = min_chunk;
- else if (chunk < min_chunk)
+ if (chunk < 64*1024*1024)
+ chunk = 64*1024*1024;
+ } else if (chunk < min_chunk)
return 0; /* chunk size too small */
if (chunk == 0) /* rounding problem */
return 0;
if (offset == 0) {
+ /* start bitmap on a 4K boundary with enough space for
+ * the bitmap
+ */
bits = (size*512) / chunk + 1;
- room = ((bits+7)/8 + sizeof(bitmap_super_t) +511)/512;
+ room = ((bits+7)/8 + sizeof(bitmap_super_t) +4095)/4096;
+ room *= 8; /* convert 4K blocks to sectors */
offset = -room;
}
{
if (st->sb)
free(st->sb);
+ while (st->info) {
+ struct devinfo *di = st->info;
+ st->info = di->next;
+ if (di->fd >= 0)
+ close(di->fd);
+ free(di);
+ }
st->sb = NULL;
}
#ifndef MDASSEMBLE
static int validate_geometry1(struct supertype *st, int level,
int layout, int raiddisks,
- int chunk, unsigned long long size,
+ int *chunk, unsigned long long size,
char *subdev, unsigned long long *freesize,
int verbose)
{
unsigned long long ldsize;
int fd;
- if (level == LEVEL_CONTAINER)
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ fprintf(stderr, Name ": 1.x metadata does not support containers\n");
return 0;
+ }
+ if (chunk && *chunk == UnSet)
+ *chunk = DEFAULT_CHUNK;
+
if (!subdev)
return 1;
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
.getinfo_super = getinfo_super1,
+ .container_content = container_content1,
.update_super = update_super1,
.init_super = init_super1,
.store_super = store_super1,