X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Create.c;h=19793fa405af2417b6c09df9f7e294429fc880ea;hp=c7d35979a443c22f625069869d68ab9cddba1266;hb=111d01fcc76d2e7d0b05f78fae67e89cdf6856ad;hpb=52826846282e9e224e05dde6d2e4cb38d1fefec7 diff --git a/Create.c b/Create.c index c7d35979..19793fa4 100644 --- a/Create.c +++ b/Create.c @@ -1,7 +1,7 @@ /* - * mdctl - manage Linux "md" devices aka RAID arrays. + * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001 Neil Brown + * Copyright (C) 2001-2006 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -27,14 +27,17 @@ * Australia */ -#include "mdctl.h" +#include "mdadm.h" #include "md_u.h" #include "md_p.h" +#include -int Create(char *mddev, int mdfd, - int chunk, int level, int layout, int size, int raiddisks, int sparedisks, - int subdevs, char *subdev[], - int runstop, int verbose, int force) +int Create(struct supertype *st, char *mddev, int mdfd, + int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks, + char *name, char *homehost, int *uuid, + int subdevs, mddev_dev_t devlist, + int runstop, int verbose, int force, int assume_clean, + char *bitmap_file, int bitmap_chunk, int write_behind, int delay) { /* * Create a new raid array. @@ -49,151 +52,275 @@ int Create(char *mddev, int mdfd, * abort. * * SET_ARRAY_INFO and ADD_NEW_DISK, and - * if runstop==run, or raiddisks diskswere used, + * if runstop==run, or raiddisks disks were used, * RUN_ARRAY */ - int minsize, maxsize; - int maxdisc= -1, mindisc = -1; - int i; + unsigned long long minsize=0, maxsize=0; + char *mindisc = NULL; + char *maxdisc = NULL; + int dnum; + mddev_dev_t dv; int fail=0, warn=0; struct stat stb; - int first_missing = MD_SB_DISKS*2; + int first_missing = subdevs * 2; + int second_missing = subdevs * 2; int missing_disks = 0; - int insert_point = MD_SB_DISKS*2; /* where to insert a missing drive */ + int insert_point = subdevs * 2; /* where to insert a missing drive */ + int pass; + int vers; + int rv; + int bitmap_fd; + unsigned long long bitmapsize; + struct mdinfo info; - mdu_array_info_t array; - + int major_num = BITMAP_MAJOR_HI; - if (md_get_version(mdfd) < 9000) { - fprintf(stderr, Name ": Create requires md driver verison 0.90.0 or later\n"); + memset(&info, 0, sizeof(info)); + + vers = md_get_version(mdfd); + if (vers < 9000) { + fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n"); return 1; + } else { + mdu_array_info_t inf; + memset(&inf, 0, sizeof(inf)); + ioctl(mdfd, GET_ARRAY_INFO, &inf); + if (inf.working_disks != 0) { + fprintf(stderr, Name ": another array by this name" + " is already running.\n"); + return 1; + } } - if (level == -10) { + if (level == UnSet) { fprintf(stderr, Name ": a RAID level is needed to create an array.\n"); return 1; } - if (raiddisks < 1) { + if (raiddisks < 4 && level == 6) { + fprintf(stderr, + Name ": at least 4 raid-devices needed for level 6\n"); + return 1; + } + if (raiddisks > 256 && level == 6) { fprintf(stderr, - Name ": a number of --raid-disks must be given to create an array\n"); + Name ": no more than 256 raid-devices supported for level 6\n"); return 1; } if (raiddisks < 2 && level >= 4) { fprintf(stderr, - Name ": atleast 2 raid-disks needed for level 4 or 5\n"); + Name ": at least 2 raid-devices needed for level 4 or 5\n"); return 1; } - if (raiddisks+sparedisks > MD_SB_DISKS) { + if (level <= 0 && sparedisks) { fprintf(stderr, - Name ": too many discs requested: %d+%d > %d\n", - raiddisks, sparedisks, MD_SB_DISKS); + Name ": This level does not support spare devices\n"); return 1; } if (subdevs > raiddisks+sparedisks) { - fprintf(stderr, Name ": You have listed more disks (%d) than are in the array(%d)!\n", subdevs, raiddisks+sparedisks); + fprintf(stderr, Name ": You have listed more devices (%d) than are in the array(%d)!\n", subdevs, raiddisks+sparedisks); return 1; } - if (subdevs < raiddisks) { + if (subdevs < raiddisks+sparedisks) { fprintf(stderr, Name ": You haven't given enough devices (real or missing) to create this array\n"); return 1; } + if (bitmap_file && level <= 0) { + fprintf(stderr, Name ": bitmaps not meaningful with level %s\n", + map_num(pers, level)?:"given"); + return 1; + } /* now set some defaults */ - if (layout == -1) + if (layout == UnSet) switch(level) { default: /* no layout */ layout = 0; break; + case 10: + layout = 0x102; /* near=2, far=1 */ + if (verbose > 0) + fprintf(stderr, + Name ": layout defaults to n1\n"); + break; case 5: + case 6: layout = map_name(r5layout, "default"); - if (verbose) + if (verbose > 0) fprintf(stderr, Name ": layout defaults to %s\n", map_num(r5layout, layout)); break; + case LEVEL_FAULTY: + layout = map_name(faultylayout, "default"); + + if (verbose > 0) + fprintf(stderr, + Name ": layout defaults to %s\n", map_num(faultylayout, layout)); + break; + } + + if (level == 10) + /* check layout fits in array*/ + if ((layout&255) * ((layout>>8)&255) > raiddisks) { + fprintf(stderr, Name ": that layout requires at least %d devices\n", + (layout&255) * ((layout>>8)&255)); + return 1; } - if (chunk == 0) { - chunk = 64; - if (verbose) - fprintf(stderr, Name ": chunk size defaults to 64K\n"); + switch(level) { + case 4: + case 5: + case 10: + case 6: + case 0: + case LEVEL_LINEAR: /* linear */ + if (chunk == 0) { + chunk = 64; + if (verbose > 0) + fprintf(stderr, Name ": chunk size defaults to 64K\n"); + } + break; + case 1: + case LEVEL_FAULTY: + case LEVEL_MULTIPATH: + case LEVEL_CONTAINER: + if (chunk) { + chunk = 0; + if (verbose > 0) + fprintf(stderr, Name ": chunk size ignored for this level\n"); + } + break; + default: + fprintf(stderr, Name ": unknown level %d\n", level); + return 1; } + if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks, + chunk, size, NULL, NULL)) + return 1; + /* now look at the subdevs */ - array.active_disks = 0; - array.working_disks = 0; - for (i=0; i i) - first_missing = i; + info.array.active_disks = 0; + info.array.working_disks = 0; + dnum = 0; + for (dv=devlist; dv; dv=dv->next, dnum++) { + char *dname = dv->devname; + unsigned long long freesize; + if (strcasecmp(dname, "missing")==0) { + if (first_missing > dnum) + first_missing = dnum; + if (second_missing > dnum && dnum > first_missing) + second_missing = dnum; missing_disks ++; continue; } - array.working_disks++; - if (i < raiddisks) - array.active_disks++; - fd = open(dname, O_RDONLY, 0); - if (fd <0 ) { - fprintf(stderr, Name ": Cannot open %s: %s\n", - dname, strerror(errno)); - fail=1; - continue; + info.array.working_disks++; + if (dnum < raiddisks) + info.array.active_disks++; + if (st == NULL) { + struct createinfo *ci = conf_get_create_info(); + if (ci) + st = ci->supertype; } - if (ioctl(fd, BLKGETSIZE, &dsize)) { - fprintf(stderr, Name ": Cannot get size of %s: %s\n", - dname, strerror(errno)); - fail = 1; - close(fd); - continue; + if (st == NULL) { + /* Need to choose a default metadata, which is different + * depending on geometry of array. + */ + int i; + char *name = "default"; + for(i=0; !st && superlist[i]; i++) { + st = superlist[i]->match_metadata_desc(name); + if (st && !st->ss->validate_geometry + (st, level, layout, raiddisks, + chunk, size, dname, &freesize)) + st = NULL; + } + + if (!st) { + fprintf(stderr, Name ": device %s not suitable " + "for any style of array\n", + dname); + exit(2); + } + if (st->ss->major != 0 || + st->minor_version != 90) + fprintf(stderr, Name ": Defaulting to version" + " %d.%d metadata\n", + st->ss->major, + st->minor_version); + } else { + if (!st->ss->validate_geometry(st, level, layout, + raiddisks, + chunk, size, dname, + &freesize)) { + + fprintf(stderr, + Name ": %s is not suitable for " + "this array.\n", + dname); + fail = 1; + continue; + } } - if (dsize < MD_RESERVED_SECTORS*2) { - fprintf(stderr, Name ": %s is too small: %dK\n", - dname, dsize/2); - fail = 1; - close(fd); - continue; + + freesize /= 2; /* convert to K */ + if (chunk) { + /* round to chunk size */ + freesize = freesize & ~(chunk-1); } - freesize = MD_NEW_SIZE_SECTORS(dsize); - freesize /= 2; if (size && freesize < size) { fprintf(stderr, Name ": %s is smaller that given size." - " %dK < %dK + superblock\n", dname, freesize, size); + " %lluK < %lluK + metadata\n", + dname, freesize, size); fail = 1; - close(fd); continue; } - if (maxdisc< 0 || (maxdisc>=0 && freesize > maxsize)) { - maxdisc = i; + if (maxdisc == NULL || (maxdisc && freesize > maxsize)) { + maxdisc = dname; maxsize = freesize; } - if (mindisc < 0 || (mindisc >=0 && freesize < minsize)) { - mindisc = i; + if (mindisc ==NULL || (mindisc && freesize < minsize)) { + mindisc = dname; minsize = freesize; } - warn |= check_ext2(fd, dname); - warn |= check_reiser(fd, dname); - warn |= check_raid(fd, dname); - close(fd); + if (runstop != 1 || verbose >= 0) { + int fd = open(dname, O_RDONLY, 0); + if (fd <0 ) { + fprintf(stderr, Name ": Cannot open %s: %s\n", + dname, strerror(errno)); + fail=1; + continue; + } + warn |= check_ext2(fd, dname); + warn |= check_reiser(fd, dname); + warn |= check_raid(fd, dname); + close(fd); + } } if (fail) { fprintf(stderr, Name ": create aborted\n"); return 1; } if (size == 0) { - if (mindisc == -1) { + if (mindisc == NULL) { fprintf(stderr, Name ": no size and no drives given - aborting create.\n"); return 1; } - size = minsize; - if (verbose && level>0) - fprintf(stderr, Name ": size set to %dK\n", size); + if (level > 0 || level == LEVEL_MULTIPATH || level == LEVEL_FAULTY) { + /* size is meaningful */ + if (minsize > 0x100000000ULL && st->ss->major == 0) { + fprintf(stderr, Name ": devices too large for RAID level %d\n", level); + return 1; + } + size = minsize; + if (verbose > 0) + fprintf(stderr, Name ": size set to %lluK\n", size); + } } - if ((maxsize-size)*100 > maxsize) { - fprintf(stderr, Name ": largest drive (%s) exceed size (%dK) by more than 1%\n", - subdev[maxdisc], size); + if (level > 0 && ((maxsize-size)*100 > maxsize)) { + if (runstop != 1 || verbose >= 0) + fprintf(stderr, Name ": largest drive (%s) exceed size (%lluK) by more than 1%%\n", + maxdisc, size); warn = 1; } @@ -204,37 +331,82 @@ int Create(char *mddev, int mdfd, return 1; } } else { - if (verbose) + if (verbose > 0) fprintf(stderr, Name ": creation continuing despite oddities due to --run\n"); } } - /* If this is raid5, we want to configure the last active slot + /* If this is raid4/5, we want to configure the last active slot * as missing, so that a reconstruct happens (faster than re-parity) + * FIX: Can we do this for raid6 as well? + */ + if (assume_clean==0 && force == 0 && first_missing >= raiddisks) { + switch ( level ) { + case 4: + case 5: + insert_point = raiddisks-1; + sparedisks++; + info.array.active_disks--; + missing_disks++; + break; + default: + break; + } + } + /* For raid6, if creating with 1 missing drive, make a good drive + * into a spare, else the create will fail */ - if (force == 0 && level == 5 && first_missing >= raiddisks) { - insert_point = raiddisks-1; - sparedisks++; - array.active_disks--; + if (assume_clean == 0 && force == 0 && first_missing < raiddisks && + second_missing >= raiddisks && level == 6) { + insert_point = raiddisks - 1; + if (insert_point == first_missing) + insert_point--; + sparedisks ++; + info.array.active_disks--; missing_disks++; } - + + if (level <= 0 && first_missing != subdevs * 2) { + fprintf(stderr, + Name ": This level does not support missing devices\n"); + return 1; + } + /* Ok, lets try some ioctls */ - array.level = level; - array.size = size; - array.raid_disks = raiddisks; + info.array.level = level; + info.array.size = size; + info.array.raid_disks = raiddisks; /* The kernel should *know* what md_minor we are dealing * with, but it chooses to trust me instead. Sigh */ - array.md_minor = 0; + info.array.md_minor = 0; if (fstat(mdfd, &stb)==0) - array.md_minor = MINOR(stb.st_rdev); - array.not_persistent = 0; - if (level == 5 && (insert_point < raiddisks || first_missing < raiddisks)) - array.state = 1; /* clean, but one drive will be missing */ + info.array.md_minor = minor(stb.st_rdev); + info.array.not_persistent = 0; + + if ( ( (level == 4 || level == 5) && + (insert_point < raiddisks || first_missing < raiddisks) ) + || + ( level == 6 && (insert_point < raiddisks + || second_missing < raiddisks)) + || + assume_clean + ) + info.array.state = 1; /* clean, but one+ drive will be missing*/ else - array.state = 0; /* not clean, but no errors */ + info.array.state = 0; /* not clean, but no errors */ + + if (level == 10) { + /* for raid10, the bitmap size is the capacity of the array, + * which is array.size * raid_disks / ncopies; + * .. but convert to sectors. + */ + int ncopies = ((layout>>8) & 255) * (layout & 255); + bitmapsize = (unsigned long long)size * raiddisks / ncopies * 2; +/* printf("bms=%llu as=%d rd=%d nc=%d\n", bitmapsize, size, raiddisks, ncopies);*/ + } else + bitmapsize = (unsigned long long)size * 2; /* There is lots of redundancy in these disk counts, * raid_disks is the most meaningful value @@ -255,74 +427,181 @@ int Create(char *mddev, int mdfd, * So for now, we assume that all raid and spare * devices will be given. */ - array.spare_disks=sparedisks; - array.failed_disks=missing_disks; - array.nr_disks = array.working_disks + array.failed_disks; - array.layout = layout; - array.chunk_size = chunk*1024; + info.array.spare_disks=sparedisks; + info.array.failed_disks=missing_disks; + info.array.nr_disks = info.array.working_disks + + info.array.failed_disks; + info.array.layout = layout; + info.array.chunk_size = chunk*1024; + info.array.major_version = st->ss->major; + + if (name == NULL || *name == 0) { + /* base name on mddev */ + /* /dev/md0 -> 0 + * /dev/md_d0 -> d0 + * /dev/md/1 -> 1 + * /dev/md/d1 -> d1 + * /dev/md/home -> home + * /dev/mdhome -> home + */ + name = strrchr(mddev, '/'); + if (name) { + name++; + if (strncmp(name, "md_d", 4)==0 && + strlen(name) > 4 && + isdigit(name[4]) && + (name-mddev) == 5 /* /dev/ */) + name += 3; + else if (strncmp(name, "md", 2)==0 && + strlen(name) > 2 && + isdigit(name[2]) && + (name-mddev) == 5 /* /dev/ */) + name += 2; + } + } + if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid)) + return 1; - if (ioctl(mdfd, SET_ARRAY_INFO, &array)) { + if (bitmap_file && vers < 9003) { + major_num = BITMAP_MAJOR_HOSTENDIAN; +#ifdef __BIG_ENDIAN + fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n" + " between different architectured. Consider upgrading the Linux kernel.\n"); +#endif + } + + if (bitmap_file && strcmp(bitmap_file, "internal")==0) { + if ((vers%100) < 2) { + fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n"); + return 1; + } + if (!st->ss->add_internal_bitmap(st, &bitmap_chunk, + delay, write_behind, + bitmapsize, 1, major_num)) { + fprintf(stderr, Name ": Given bitmap chunk size not supported.\n"); + return 1; + } + bitmap_file = NULL; + } + + + + if ((vers % 100) >= 1) { /* can use different versions */ + mdu_array_info_t inf; + memset(&inf, 0, sizeof(inf)); + inf.major_version = st->ss->major; + inf.minor_version = st->minor_version; + rv = ioctl(mdfd, SET_ARRAY_INFO, &inf); + } else + rv = ioctl(mdfd, SET_ARRAY_INFO, NULL); + if (rv) { fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n", mddev, strerror(errno)); return 1; } - - for (i=0; i= insert_point) - disk.number++; - disk.raid_disk = disk.number; - if (disk.raid_disk < raiddisks) - disk.state = 6; /* active and in sync */ - else - disk.state = 0; - if (strcasecmp(subdev[i], "missing")==0) { - disk.major = 0; - disk.minor = 0; - disk.state = 1; /* faulty */ - } else { - fd = open(subdev[i], O_RDONLY, 0); - if (fd < 0) { - fprintf(stderr, Name ": failed to open %s after earlier success - aborting\n", - subdev[i]); - return 1; - } - fstat(fd, &stb); - disk.major = MAJOR(stb.st_rdev); - disk.minor = MINOR(stb.st_rdev); - close(fd); + + if (bitmap_file) { + int uuid[4]; + + st->ss->uuid_from_super(st, uuid); + if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, + delay, write_behind, + bitmapsize, + major_num)) { + return 1; } - if (ioctl(mdfd, ADD_NEW_DISK, &disk)) { - fprintf(stderr, Name ": ADD_NEW_DISK for %s failed: %s\n", - subdev[i], strerror(errno)); + bitmap_fd = open(bitmap_file, O_RDWR); + if (bitmap_fd < 0) { + fprintf(stderr, Name ": weird: %s cannot be openned\n", + bitmap_file); + return 1; + } + if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) { + fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n", + mddev, strerror(errno)); return 1; } } - if (insert_point < MD_SB_DISKS) { - mdu_disk_info_t disk; - disk.number = insert_point; - disk.raid_disk = disk.number; - disk.state = 1; /* faulty */ - disk.major = disk.minor = 0; - ioctl(mdfd,ADD_NEW_DISK, &disk); + + + for (pass=1; pass <=2 ; pass++) { + mddev_dev_t moved_disk = NULL; /* the disk that was moved out of the insert point */ + + for (dnum=0, dv = devlist ; dv ; + dv=(dv->next)?(dv->next):moved_disk, dnum++) { + int fd; + struct stat stb; + + info.disk.number = dnum; + if (dnum == insert_point) { + moved_disk = dv; + } + info.disk.raid_disk = info.disk.number; + if (info.disk.raid_disk < raiddisks) + info.disk.state = (1<writemostly) + info.disk.state |= (1<devname, "missing")==0) + continue; + + fd = open(dv->devname, O_RDWR|O_EXCL, 0); + if (fd < 0) { + fprintf(stderr, Name ": failed to open %s " + "after earlier success - aborting\n", + dv->devname); + return 1; + } + fstat(fd, &stb); + info.disk.major = major(stb.st_rdev); + info.disk.minor = minor(stb.st_rdev); + + switch(pass){ + case 1: + remove_partitions(fd); + st->ss->add_to_super(st, &info.disk, + fd, dv->devname); + break; + case 2: + close(fd); + + if (ioctl(mdfd, ADD_NEW_DISK, &info.disk)) { + fprintf(stderr, Name ": ADD_NEW_DISK for %s failed: %s\n", + dv->devname, strerror(errno)); + st->ss->free_super(st); + return 1; + } + + break; + } + if (dv == moved_disk && dnum != insert_point) break; + } + if (pass == 1) + st->ss->write_init_super(st); } + st->ss->free_super(st); /* param is not actually used */ - if (runstop == 1 || subdevs >= raiddisks) { + if (level == LEVEL_CONTAINER) + /* No need to start */ + ; + else if (runstop == 1 || subdevs >= raiddisks) { mdu_param_t param; if (ioctl(mdfd, RUN_ARRAY, ¶m)) { fprintf(stderr, Name ": RUN_ARRAY failed: %s\n", strerror(errno)); + Manage_runstop(mddev, mdfd, -1, 0); return 1; } - fprintf(stderr, Name ": array %s started.\n", mddev); + if (verbose >= 0) + fprintf(stderr, Name ": array %s started.\n", mddev); } else { - fprintf(stderr, Name ": not starting array - not enough discs.\n"); + fprintf(stderr, Name ": not starting array - not enough devices.\n"); } return 0; }