* RUN_ARRAY
*/
int mdfd;
- unsigned long long minsize=0, maxsize=0;
+ unsigned long long minsize = 0, maxsize = 0;
char *mindisc = NULL;
char *maxdisc = NULL;
- int dnum;
+ int dnum, raid_disk_num;
struct mddev_dev *dv;
- int fail=0, warn=0;
+ int fail = 0, warn = 0;
struct stat stb;
int first_missing = subdevs * 2;
int second_missing = subdevs * 2;
int insert_point = subdevs * 2; /* where to insert a missing drive */
int total_slots;
int pass;
- int vers;
int rv;
int bitmap_fd;
int have_container = 0;
char chosen_name[1024];
struct map_ent *map = NULL;
unsigned long long newsize;
+ mdu_array_info_t inf;
int major_num = BITMAP_MAJOR_HI;
+ if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0) {
+ major_num = BITMAP_MAJOR_CLUSTERED;
+ if (c->nodes <= 1) {
+ pr_err("At least 2 nodes are needed for cluster-md\n");
+ return 1;
+ }
+ }
memset(&info, 0, sizeof(info));
if (s->level == UnSet && st && st->ss->default_geometry)
/* If given a single device, it might be a container, and we can
* extract a device list from there
*/
- mdu_array_info_t inf;
int fd;
memset(&inf, 0, sizeof(inf));
fd = open(devlist->devname, O_RDONLY);
if (fd >= 0 &&
- ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
- inf.raid_disks == 0) {
+ md_get_array_info(fd, &inf) == 0 && inf.raid_disks == 0) {
/* yep, looks like a container */
if (st) {
rv = st->ss->load_container(st, fd,
pr_err("This metadata type does not support spare disks at create time\n");
return 1;
}
- if (subdevs > s->raiddisks+s->sparedisks) {
+ if (subdevs > s->raiddisks+s->sparedisks+s->journaldisks) {
pr_err("You have listed more devices (%d) than are in the array(%d)!\n", subdevs, s->raiddisks+s->sparedisks);
return 1;
}
- if (!have_container && subdevs < s->raiddisks+s->sparedisks) {
+ if (!have_container && subdevs < s->raiddisks+s->sparedisks+s->journaldisks) {
pr_err("You haven't given enough devices (real or missing) to create this array\n");
return 1;
}
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
data_offset, NULL,
- &newsize, c->verbose>=0))
+ &newsize, s->consistency_policy,
+ c->verbose >= 0))
return 1;
if (s->chunk && s->chunk != UnSet) {
info.array.active_disks = 0;
info.array.working_disks = 0;
dnum = 0;
- for (dv = devlist; dv ; dv = dv->next)
+ for (dv = devlist; dv; dv = dv->next)
if (data_offset == VARIABLE_OFFSET)
dv->data_offset = INVALID_SECTORS;
else
int dfd;
char *doff;
- if (strcasecmp(dname, "missing")==0) {
+ if (strcasecmp(dname, "missing") == 0) {
if (first_missing > dnum)
first_missing = dnum;
if (second_missing > dnum && dnum > first_missing)
}
close(dfd);
info.array.working_disks++;
- if (dnum < s->raiddisks)
+ if (dnum < s->raiddisks && dv->disposition != 'j')
info.array.active_disks++;
if (st == NULL) {
struct createinfo *ci = conf_get_create_info();
*/
int i;
char *name = "default";
- for(i=0; !st && superlist[i]; i++) {
+ for(i = 0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
if (!st)
continue;
st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
dv->data_offset, dname,
- &freesize, c->verbose > 0)) {
+ &freesize, s->consistency_policy,
+ c->verbose > 0)) {
case -1: /* Not valid, message printed, and not
* worth checking any further */
exit(2);
&s->chunk, s->size*2,
dv->data_offset,
dname, &freesize,
+ s->consistency_policy,
c->verbose >= 0)) {
pr_err("%s is not suitable for this array.\n",
}
}
+ if (dv->disposition == 'j')
+ goto skip_size_check; /* skip write journal for size check */
+
freesize /= 2; /* convert to K */
if (s->chunk && s->chunk != UnSet) {
/* round to chunk size */
mindisc = dname;
minsize = freesize;
}
+ skip_size_check:
if (c->runstop != 1 || c->verbose >= 0) {
int fd = open(dname, O_RDONLY);
- if (fd <0 ) {
+ if (fd < 0) {
pr_err("Cannot open %s: %s\n",
dname, strerror(errno));
- fail=1;
+ fail = 1;
continue;
}
warn |= check_ext2(fd, dname);
st->minor_version >= 1)
/* metadata at front */
warn |= check_partitions(fd, dname, 0, 0);
- else if (s->level == 1 || s->level == LEVEL_CONTAINER
- || (s->level == 0 && s->raiddisks == 1))
+ else if (s->level == 1 || s->level == LEVEL_CONTAINER ||
+ (s->level == 0 && s->raiddisks == 1))
/* partitions could be meaningful */
warn |= check_partitions(fd, dname, freesize*2, s->size*2);
else
pr_err("no size and no drives given - aborting create.\n");
return 1;
}
- if (s->level > 0 || s->level == LEVEL_MULTIPATH
- || s->level == LEVEL_FAULTY
- || st->ss->external ) {
+ if (s->level > 0 || s->level == LEVEL_MULTIPATH ||
+ s->level == LEVEL_FAULTY || st->ss->external) {
/* size is meaningful */
if (!st->ss->validate_geometry(st, s->level, s->layout,
s->raiddisks,
&s->chunk, minsize*2,
data_offset,
- NULL, NULL, 0)) {
+ NULL, NULL,
+ s->consistency_policy, 0)) {
pr_err("devices too large for RAID level %d\n", s->level);
return 1;
}
if (!s->bitmap_file &&
s->level >= 1 &&
st->ss->add_internal_bitmap &&
+ (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
(s->write_behind || s->size > 100*1024*1024ULL)) {
if (c->verbose > 0)
pr_err("automatically enabling write-intent bitmap on large array\n");
if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
s->bitmap_file = NULL;
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ !st->ss->write_init_ppl) {
+ pr_err("%s metadata does not support PPL\n", st->ss->name);
+ return 1;
+ }
+
if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
if (c->runstop != 1 || c->verbose >= 0)
pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
* as missing, so that a reconstruct happens (faster than re-parity)
* FIX: Can we do this for raid6 as well?
*/
- if (st->ss->external == 0 &&
- s->assume_clean==0 && c->force == 0 && first_missing >= s->raiddisks) {
- switch ( s->level ) {
+ if (st->ss->external == 0 && s->assume_clean == 0 &&
+ c->force == 0 && first_missing >= s->raiddisks) {
+ switch (s->level) {
case 4:
case 5:
insert_point = s->raiddisks-1;
* it could be in conflict with already existing device
* e.g. container, array
*/
- if (strncmp(chosen_name, "/dev/md/", 8) == 0
- && map_by_name(&map, chosen_name+8) != NULL) {
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0 &&
+ map_by_name(&map, chosen_name+8) != NULL) {
pr_err("Array name %s is in use already.\n",
chosen_name);
close(mdfd);
}
mddev = chosen_name;
- vers = md_get_version(mdfd);
- if (vers < 9000) {
- pr_err("Create requires md driver version 0.90.0 or later\n");
+ memset(&inf, 0, sizeof(inf));
+ md_get_array_info(mdfd, &inf);
+ if (inf.working_disks != 0) {
+ pr_err("another array by this name is already running.\n");
goto abort_locked;
- } else {
- mdu_array_info_t inf;
- memset(&inf, 0, sizeof(inf));
- ioctl(mdfd, GET_ARRAY_INFO, &inf);
- if (inf.working_disks != 0) {
- pr_err("another array by this name is already running.\n");
- goto abort_locked;
- }
}
/* Ok, lets try some ioctls */
* with, but it chooses to trust me instead. Sigh
*/
info.array.md_minor = 0;
- if (fstat(mdfd, &stb)==0)
+ if (fstat(mdfd, &stb) == 0)
info.array.md_minor = minor(stb.st_rdev);
info.array.not_persistent = 0;
- if ( ( (s->level == 4 || s->level == 5) &&
- (insert_point < s->raiddisks || first_missing < s->raiddisks) )
- ||
- ( s->level == 6 && (insert_point < s->raiddisks
- || second_missing < s->raiddisks))
- ||
- ( s->level <= 0 )
- ||
- s->assume_clean
- ) {
+ if (((s->level == 4 || s->level == 5) &&
+ (insert_point < s->raiddisks || first_missing < s->raiddisks)) ||
+ (s->level == 6 && (insert_point < s->raiddisks ||
+ second_missing < s->raiddisks)) ||
+ (s->level <= 0) || s->assume_clean) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
info.resync_start = MaxSector;
} else {
name = strrchr(mddev, '/');
if (name) {
name++;
- if (strncmp(name, "md_", 3)==0 &&
- strlen(name) > 3 &&
- (name-mddev) == 5 /* /dev/ */)
+ if (strncmp(name, "md_", 3) == 0 &&
+ strlen(name) > 3 && (name-mddev) == 5 /* /dev/ */)
name += 3;
- else if (strncmp(name, "md", 2)==0 &&
- strlen(name) > 2 &&
- isdigit(name[2]) &&
+ else if (strncmp(name, "md", 2) == 0 &&
+ strlen(name) > 2 && isdigit(name[2]) &&
(name-mddev) == 5 /* /dev/ */)
name += 2;
}
}
- if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
+ if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
data_offset))
goto abort_locked;
total_slots = info.array.nr_disks;
st->ss->getinfo_super(st, &info, NULL);
- sysfs_init(&info, mdfd, NULL);
+ if (sysfs_init(&info, mdfd, NULL)) {
+ pr_err("unable to initialize sysfs\n");
+ goto abort_locked;
+ }
if (did_default && c->verbose >= 0) {
if (is_subarray(info.text_version)) {
* to stop another mdadm from finding and using those devices.
*/
- if (s->bitmap_file && vers < 9003) {
- major_num = BITMAP_MAJOR_HOSTENDIAN;
-#ifdef __BIG_ENDIAN
- pr_err("Warning - bitmaps created on this kernel are not portable\n"
- " between different architectured. Consider upgrading the Linux kernel.\n");
-#endif
- }
-
- if (s->bitmap_file && (strcmp(s->bitmap_file, "internal")==0 ||
- strcmp(s->bitmap_file, "clustered")==0)) {
- if ((vers%100) < 2) {
- pr_err("internal bitmaps not supported by this kernel.\n");
- goto abort_locked;
- }
+ if (s->bitmap_file && (strcmp(s->bitmap_file, "internal") == 0 ||
+ strcmp(s->bitmap_file, "clustered") == 0)) {
if (!st->ss->add_internal_bitmap) {
pr_err("internal bitmaps not supported with %s metadata\n",
st->ss->name);
goto abort_locked;
}
- if (!st->ss->add_internal_bitmap(st, &s->bitmap_chunk,
- c->delay, s->write_behind,
- bitmapsize, 1, major_num)) {
+ if (st->ss->add_internal_bitmap(st, &s->bitmap_chunk,
+ c->delay, s->write_behind,
+ bitmapsize, 1, major_num)) {
pr_err("Given bitmap chunk size not supported.\n");
goto abort_locked;
}
s->bitmap_file = NULL;
}
- sysfs_init(&info, mdfd, NULL);
+ if (sysfs_init(&info, mdfd, NULL)) {
+ pr_err("unable to initialize sysfs\n");
+ goto abort_locked;
+ }
if (st->ss->external && st->container_devnm[0]) {
/* member */
infos = xmalloc(sizeof(*infos) * total_slots);
enable_fds(total_slots);
- for (pass=1; pass <=2 ; pass++) {
+ for (pass = 1; pass <= 2; pass++) {
struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
- for (dnum=0, dv = devlist ; dv ;
- dv=(dv->next)?(dv->next):moved_disk, dnum++) {
+ for (dnum = 0, raid_disk_num = 0, dv = devlist; dv;
+ dv = (dv->next) ? (dv->next) : moved_disk, dnum++) {
int fd;
- struct stat stb;
+ struct stat stb2;
struct mdinfo *inf = &infos[dnum];
if (dnum >= total_slots)
abort();
if (dnum == insert_point) {
+ raid_disk_num += 1;
moved_disk = dv;
continue;
}
- if (strcasecmp(dv->devname, "missing")==0)
+ if (strcasecmp(dv->devname, "missing") == 0) {
+ raid_disk_num += 1;
continue;
+ }
if (have_container)
moved_disk = NULL;
if (have_container && dnum < info.array.raid_disks - 1)
*inf = info;
inf->disk.number = dnum;
- inf->disk.raid_disk = dnum;
- if (inf->disk.raid_disk < s->raiddisks)
+ inf->disk.raid_disk = raid_disk_num++;
+
+ if (dv->disposition == 'j') {
+ inf->disk.raid_disk = MD_DISK_ROLE_JOURNAL;
+ inf->disk.state = (1<<MD_DISK_JOURNAL);
+ raid_disk_num--;
+ } else if (inf->disk.raid_disk < s->raiddisks)
inf->disk.state = (1<<MD_DISK_ACTIVE) |
(1<<MD_DISK_SYNC);
else
inf->disk.state = 0;
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ inf->disk.state |= (1<<MD_DISK_FAILFAST);
if (have_container)
fd = -1;
dv->devname);
goto abort_locked;
}
- fstat(fd, &stb);
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ fstat(fd, &stb2);
+ inf->disk.major = major(stb2.st_rdev);
+ inf->disk.minor = minor(stb2.st_rdev);
}
if (fd >= 0)
remove_partitions(fd);
if (!have_container) {
/* getinfo_super might have lost these ... */
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ inf->disk.major = major(stb2.st_rdev);
+ inf->disk.minor = minor(stb2.st_rdev);
}
break;
case 2: