]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Create.c
Change the values for "max size" from -1 to 1.
[thirdparty/mdadm.git] / Create.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include <ctype.h>
29
30 static int default_layout(struct supertype *st, int level, int verbose)
31 {
32 int layout = UnSet;
33
34 if (st && st->ss->default_geometry)
35 st->ss->default_geometry(st, &level, &layout, NULL);
36
37 if (layout == UnSet)
38 switch(level) {
39 default: /* no layout */
40 layout = 0;
41 break;
42 case 10:
43 layout = 0x102; /* near=2, far=1 */
44 if (verbose > 0)
45 pr_err("layout defaults to n2\n");
46 break;
47 case 5:
48 case 6:
49 layout = map_name(r5layout, "default");
50 if (verbose > 0)
51 pr_err("layout defaults to %s\n", map_num(r5layout, layout));
52 break;
53 case LEVEL_FAULTY:
54 layout = map_name(faultylayout, "default");
55
56 if (verbose > 0)
57 pr_err("layout defaults to %s\n", map_num(faultylayout, layout));
58 break;
59 }
60
61 return layout;
62 }
63
64
65 int Create(struct supertype *st, char *mddev,
66 int chunk, int level, int layout, unsigned long long size,
67 int raiddisks, int sparedisks,
68 char *name, int *uuid,
69 int subdevs, struct mddev_dev *devlist,
70 int assume_clean,
71 char *bitmap_file, int bitmap_chunk, int write_behind,
72 struct context *c)
73 {
74 /*
75 * Create a new raid array.
76 *
77 * First check that necessary details are available
78 * (i.e. level, raid-disks)
79 *
80 * Then check each disk to see what might be on it
81 * and report anything interesting.
82 *
83 * If anything looks odd, and runstop not set,
84 * abort.
85 *
86 * SET_ARRAY_INFO and ADD_NEW_DISK, and
87 * if runstop==run, or raiddisks disks were used,
88 * RUN_ARRAY
89 */
90 int mdfd;
91 unsigned long long minsize=0, maxsize=0;
92 char *mindisc = NULL;
93 char *maxdisc = NULL;
94 int dnum;
95 struct mddev_dev *dv;
96 int fail=0, warn=0;
97 struct stat stb;
98 int first_missing = subdevs * 2;
99 int second_missing = subdevs * 2;
100 int missing_disks = 0;
101 int insert_point = subdevs * 2; /* where to insert a missing drive */
102 int total_slots;
103 int pass;
104 int vers;
105 int rv;
106 int bitmap_fd;
107 int have_container = 0;
108 int container_fd = -1;
109 int need_mdmon = 0;
110 unsigned long long bitmapsize;
111 struct mdinfo info, *infos;
112 int did_default = 0;
113 int do_default_layout = 0;
114 int do_default_chunk = 0;
115 unsigned long safe_mode_delay = 0;
116 char chosen_name[1024];
117 struct map_ent *map = NULL;
118 unsigned long long newsize;
119
120 int major_num = BITMAP_MAJOR_HI;
121
122 memset(&info, 0, sizeof(info));
123 if (level == UnSet && st && st->ss->default_geometry)
124 st->ss->default_geometry(st, &level, NULL, NULL);
125 if (level == UnSet) {
126 pr_err("a RAID level is needed to create an array.\n");
127 return 1;
128 }
129 if (raiddisks < 4 && level == 6) {
130 pr_err("at least 4 raid-devices needed for level 6\n");
131 return 1;
132 }
133 if (raiddisks > 256 && level == 6) {
134 pr_err("no more than 256 raid-devices supported for level 6\n");
135 return 1;
136 }
137 if (raiddisks < 2 && level >= 4) {
138 pr_err("at least 2 raid-devices needed for level 4 or 5\n");
139 return 1;
140 }
141 if (level <= 0 && sparedisks) {
142 pr_err("This level does not support spare devices\n");
143 return 1;
144 }
145
146 if (subdevs == 1 && strcmp(devlist->devname, "missing") != 0) {
147 /* If given a single device, it might be a container, and we can
148 * extract a device list from there
149 */
150 mdu_array_info_t inf;
151 int fd;
152
153 memset(&inf, 0, sizeof(inf));
154 fd = open(devlist->devname, O_RDONLY);
155 if (fd >= 0 &&
156 ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
157 inf.raid_disks == 0) {
158 /* yep, looks like a container */
159 if (st) {
160 rv = st->ss->load_container(st, fd,
161 devlist->devname);
162 if (rv == 0)
163 have_container = 1;
164 } else {
165 st = super_by_fd(fd, NULL);
166 if (st && !(rv = st->ss->
167 load_container(st, fd,
168 devlist->devname)))
169 have_container = 1;
170 else
171 st = NULL;
172 }
173 if (have_container) {
174 subdevs = raiddisks;
175 first_missing = subdevs * 2;
176 second_missing = subdevs * 2;
177 insert_point = subdevs * 2;
178 }
179 }
180 if (fd >= 0)
181 close(fd);
182 }
183 if (st && st->ss->external && sparedisks) {
184 pr_err("This metadata type does not support "
185 "spare disks at create time\n");
186 return 1;
187 }
188 if (subdevs > raiddisks+sparedisks) {
189 pr_err("You have listed more devices (%d) than are in the array(%d)!\n", subdevs, raiddisks+sparedisks);
190 return 1;
191 }
192 if (!have_container && subdevs < raiddisks+sparedisks) {
193 pr_err("You haven't given enough devices (real or missing) to create this array\n");
194 return 1;
195 }
196 if (bitmap_file && level <= 0) {
197 pr_err("bitmaps not meaningful with level %s\n",
198 map_num(pers, level)?:"given");
199 return 1;
200 }
201
202 /* now set some defaults */
203
204
205 if (layout == UnSet) {
206 do_default_layout = 1;
207 layout = default_layout(st, level, c->verbose);
208 }
209
210 if (level == 10)
211 /* check layout fits in array*/
212 if ((layout&255) * ((layout>>8)&255) > raiddisks) {
213 pr_err("that layout requires at least %d devices\n",
214 (layout&255) * ((layout>>8)&255));
215 return 1;
216 }
217
218 switch(level) {
219 case 4:
220 case 5:
221 case 10:
222 case 6:
223 case 0:
224 if (chunk == 0 || chunk == UnSet) {
225 chunk = UnSet;
226 do_default_chunk = 1;
227 /* chunk will be set later */
228 }
229 break;
230 case LEVEL_LINEAR:
231 /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
232 if (get_linux_version() < 2006016 && chunk == 0) {
233 chunk = 64;
234 if (c->verbose > 0)
235 pr_err("chunk size defaults to 64K\n");
236 }
237 break;
238 case 1:
239 case LEVEL_FAULTY:
240 case LEVEL_MULTIPATH:
241 case LEVEL_CONTAINER:
242 if (chunk) {
243 chunk = 0;
244 if (c->verbose > 0)
245 pr_err("chunk size ignored for this level\n");
246 }
247 break;
248 default:
249 pr_err("unknown level %d\n", level);
250 return 1;
251 }
252 if (size == MAX_SIZE)
253 /* use '0' to mean 'max' now... */
254 size = 0;
255 if (size && chunk && chunk != UnSet)
256 size &= ~(unsigned long long)(chunk - 1);
257 newsize = size * 2;
258 if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks,
259 &chunk, size*2, NULL, &newsize, c->verbose>=0))
260 return 1;
261
262 if (chunk && chunk != UnSet) {
263 newsize &= ~(unsigned long long)(chunk*2 - 1);
264 if (do_default_chunk) {
265 /* default chunk was just set */
266 if (c->verbose > 0)
267 pr_err("chunk size "
268 "defaults to %dK\n", chunk);
269 size &= ~(unsigned long long)(chunk - 1);
270 do_default_chunk = 0;
271 }
272 }
273
274 if (size == 0) {
275 size = newsize / 2;
276 if (level == 1)
277 /* If this is ever reshaped to RAID5, we will
278 * need a chunksize. So round it off a bit
279 * now just to be safe
280 */
281 size &= ~(64ULL-1);
282
283 if (size && c->verbose > 0)
284 pr_err("setting size to %lluK\n",
285 (unsigned long long)size);
286 }
287
288 /* now look at the subdevs */
289 info.array.active_disks = 0;
290 info.array.working_disks = 0;
291 dnum = 0;
292 for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
293 char *dname = dv->devname;
294 unsigned long long freesize;
295 int dfd;
296
297 if (strcasecmp(dname, "missing")==0) {
298 if (first_missing > dnum)
299 first_missing = dnum;
300 if (second_missing > dnum && dnum > first_missing)
301 second_missing = dnum;
302 missing_disks ++;
303 continue;
304 }
305 dfd = open(dname, O_RDONLY);
306 if (dfd < 0) {
307 pr_err("cannot open %s: %s\n",
308 dname, strerror(errno));
309 exit(2);
310 }
311 if (fstat(dfd, &stb) != 0 ||
312 (stb.st_mode & S_IFMT) != S_IFBLK) {
313 close(dfd);
314 pr_err("%s is not a block device\n",
315 dname);
316 exit(2);
317 }
318 close(dfd);
319 info.array.working_disks++;
320 if (dnum < raiddisks)
321 info.array.active_disks++;
322 if (st == NULL) {
323 struct createinfo *ci = conf_get_create_info();
324 if (ci)
325 st = ci->supertype;
326 }
327 if (st == NULL) {
328 /* Need to choose a default metadata, which is different
329 * depending on geometry of array.
330 */
331 int i;
332 char *name = "default";
333 for(i=0; !st && superlist[i]; i++) {
334 st = superlist[i]->match_metadata_desc(name);
335 if (!st)
336 continue;
337 if (do_default_layout)
338 layout = default_layout(st, level, c->verbose);
339 switch (st->ss->validate_geometry(
340 st, level, layout, raiddisks,
341 &chunk, size*2, dname, &freesize,
342 c->verbose > 0)) {
343 case -1: /* Not valid, message printed, and not
344 * worth checking any further */
345 exit(2);
346 break;
347 case 0: /* Geometry not valid */
348 free(st);
349 st = NULL;
350 chunk = do_default_chunk ? UnSet : chunk;
351 break;
352 case 1: /* All happy */
353 break;
354 }
355 }
356
357 if (!st) {
358 int dfd = open(dname, O_RDONLY|O_EXCL);
359 if (dfd < 0) {
360 pr_err("cannot open %s: %s\n",
361 dname, strerror(errno));
362 exit(2);
363 }
364 pr_err("device %s not suitable "
365 "for any style of array\n",
366 dname);
367 exit(2);
368 }
369 if (st->ss != &super0 ||
370 st->minor_version != 90)
371 did_default = 1;
372 } else {
373 if (do_default_layout)
374 layout = default_layout(st, level, 0);
375 if (!st->ss->validate_geometry(st, level, layout,
376 raiddisks,
377 &chunk, size*2, dname,
378 &freesize,
379 c->verbose >= 0)) {
380
381 pr_err("%s is not suitable for "
382 "this array.\n",
383 dname);
384 fail = 1;
385 continue;
386 }
387 }
388
389 freesize /= 2; /* convert to K */
390 if (chunk && chunk != UnSet) {
391 /* round to chunk size */
392 freesize = freesize & ~(chunk-1);
393 if (do_default_chunk) {
394 /* default chunk was just set */
395 if (c->verbose > 0)
396 pr_err("chunk size "
397 "defaults to %dK\n", chunk);
398 size &= ~(unsigned long long)(chunk - 1);
399 do_default_chunk = 0;
400 }
401 }
402
403 if (size && freesize < size) {
404 pr_err("%s is smaller than given size."
405 " %lluK < %lluK + metadata\n",
406 dname, freesize, size);
407 fail = 1;
408 continue;
409 }
410 if (maxdisc == NULL || (maxdisc && freesize > maxsize)) {
411 maxdisc = dname;
412 maxsize = freesize;
413 }
414 if (mindisc ==NULL || (mindisc && freesize < minsize)) {
415 mindisc = dname;
416 minsize = freesize;
417 }
418 if (c->runstop != 1 || c->verbose >= 0) {
419 int fd = open(dname, O_RDONLY);
420 if (fd <0 ) {
421 pr_err("Cannot open %s: %s\n",
422 dname, strerror(errno));
423 fail=1;
424 continue;
425 }
426 warn |= check_ext2(fd, dname);
427 warn |= check_reiser(fd, dname);
428 warn |= check_raid(fd, dname);
429 if (strcmp(st->ss->name, "1.x") == 0 &&
430 st->minor_version >= 1)
431 /* metadata at front */
432 warn |= check_partitions(fd, dname, 0, 0);
433 else if (level == 1 || level == LEVEL_CONTAINER
434 || (level == 0 && raiddisks == 1))
435 /* partitions could be meaningful */
436 warn |= check_partitions(fd, dname, freesize*2, size*2);
437 else
438 /* partitions cannot be meaningful */
439 warn |= check_partitions(fd, dname, 0, 0);
440 if (strcmp(st->ss->name, "1.x") == 0 &&
441 st->minor_version >= 1 &&
442 did_default &&
443 level == 1 &&
444 (warn & 1024) == 0) {
445 warn |= 1024;
446 pr_err("Note: this array has metadata at the start and\n"
447 " may not be suitable as a boot device. If you plan to\n"
448 " store '/boot' on this device please ensure that\n"
449 " your boot-loader understands md/v1.x metadata, or use\n"
450 " --metadata=0.90\n");
451 }
452 close(fd);
453 }
454 }
455 if (raiddisks + sparedisks > st->max_devs) {
456 pr_err("Too many devices:"
457 " %s metadata only supports %d\n",
458 st->ss->name, st->max_devs);
459 return 1;
460 }
461 if (have_container)
462 info.array.working_disks = raiddisks;
463 if (fail) {
464 pr_err("create aborted\n");
465 return 1;
466 }
467 if (size == 0) {
468 if (mindisc == NULL && !have_container) {
469 pr_err("no size and no drives given - aborting create.\n");
470 return 1;
471 }
472 if (level > 0 || level == LEVEL_MULTIPATH
473 || level == LEVEL_FAULTY
474 || st->ss->external ) {
475 /* size is meaningful */
476 if (!st->ss->validate_geometry(st, level, layout,
477 raiddisks,
478 &chunk, minsize*2,
479 NULL, NULL, 0)) {
480 pr_err("devices too large for RAID level %d\n", level);
481 return 1;
482 }
483 size = minsize;
484 if (level == 1)
485 /* If this is ever reshaped to RAID5, we will
486 * need a chunksize. So round it off a bit
487 * now just to be safe
488 */
489 size &= ~(64ULL-1);
490 if (c->verbose > 0)
491 pr_err("size set to %lluK\n", size);
492 }
493 }
494 if (!have_container && level > 0 && ((maxsize-size)*100 > maxsize)) {
495 if (c->runstop != 1 || c->verbose >= 0)
496 pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
497 maxdisc, size);
498 warn = 1;
499 }
500
501 if (st->ss->detail_platform && st->ss->detail_platform(0, 1) != 0) {
502 if (c->runstop != 1 || c->verbose >= 0)
503 pr_err("%s unable to enumerate platform support\n"
504 " array may not be compatible with hardware/firmware\n",
505 st->ss->name);
506 warn = 1;
507 }
508
509 if (warn) {
510 if (c->runstop!= 1) {
511 if (!ask("Continue creating array? ")) {
512 pr_err("create aborted.\n");
513 return 1;
514 }
515 } else {
516 if (c->verbose > 0)
517 pr_err("creation continuing despite oddities due to --run\n");
518 }
519 }
520
521 /* If this is raid4/5, we want to configure the last active slot
522 * as missing, so that a reconstruct happens (faster than re-parity)
523 * FIX: Can we do this for raid6 as well?
524 */
525 if (st->ss->external == 0 &&
526 assume_clean==0 && c->force == 0 && first_missing >= raiddisks) {
527 switch ( level ) {
528 case 4:
529 case 5:
530 insert_point = raiddisks-1;
531 sparedisks++;
532 info.array.active_disks--;
533 missing_disks++;
534 break;
535 default:
536 break;
537 }
538 }
539 /* For raid6, if creating with 1 missing drive, make a good drive
540 * into a spare, else the create will fail
541 */
542 if (assume_clean == 0 && c->force == 0 && first_missing < raiddisks &&
543 st->ss->external == 0 &&
544 second_missing >= raiddisks && level == 6) {
545 insert_point = raiddisks - 1;
546 if (insert_point == first_missing)
547 insert_point--;
548 sparedisks ++;
549 info.array.active_disks--;
550 missing_disks++;
551 }
552
553 if (level <= 0 && first_missing < subdevs * 2) {
554 pr_err("This level does not support missing devices\n");
555 return 1;
556 }
557
558 /* We need to create the device */
559 map_lock(&map);
560 mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name);
561 if (mdfd < 0) {
562 map_unlock(&map);
563 return 1;
564 }
565 /* verify if chosen_name is not in use,
566 * it could be in conflict with already existing device
567 * e.g. container, array
568 */
569 if (strncmp(chosen_name, "/dev/md/", 8) == 0
570 && map_by_name(&map, chosen_name+8) != NULL) {
571 pr_err("Array name %s is in use already.\n",
572 chosen_name);
573 close(mdfd);
574 map_unlock(&map);
575 return 1;
576 }
577 mddev = chosen_name;
578
579 vers = md_get_version(mdfd);
580 if (vers < 9000) {
581 pr_err("Create requires md driver version 0.90.0 or later\n");
582 goto abort_locked;
583 } else {
584 mdu_array_info_t inf;
585 memset(&inf, 0, sizeof(inf));
586 ioctl(mdfd, GET_ARRAY_INFO, &inf);
587 if (inf.working_disks != 0) {
588 pr_err("another array by this name"
589 " is already running.\n");
590 goto abort_locked;
591 }
592 }
593
594 /* Ok, lets try some ioctls */
595
596 info.array.level = level;
597 info.array.size = size;
598 info.array.raid_disks = raiddisks;
599 /* The kernel should *know* what md_minor we are dealing
600 * with, but it chooses to trust me instead. Sigh
601 */
602 info.array.md_minor = 0;
603 if (fstat(mdfd, &stb)==0)
604 info.array.md_minor = minor(stb.st_rdev);
605 info.array.not_persistent = 0;
606
607 if ( ( (level == 4 || level == 5) &&
608 (insert_point < raiddisks || first_missing < raiddisks) )
609 ||
610 ( level == 6 && (insert_point < raiddisks
611 || second_missing < raiddisks))
612 ||
613 ( level <= 0 )
614 ||
615 assume_clean
616 ) {
617 info.array.state = 1; /* clean, but one+ drive will be missing*/
618 info.resync_start = MaxSector;
619 } else {
620 info.array.state = 0; /* not clean, but no errors */
621 info.resync_start = 0;
622 }
623 if (level == 10) {
624 /* for raid10, the bitmap size is the capacity of the array,
625 * which is array.size * raid_disks / ncopies;
626 * .. but convert to sectors.
627 */
628 int ncopies = ((layout>>8) & 255) * (layout & 255);
629 bitmapsize = (unsigned long long)size * raiddisks / ncopies * 2;
630 /* printf("bms=%llu as=%d rd=%d nc=%d\n", bitmapsize, size, raiddisks, ncopies);*/
631 } else
632 bitmapsize = (unsigned long long)size * 2;
633
634 /* There is lots of redundancy in these disk counts,
635 * raid_disks is the most meaningful value
636 * it describes the geometry of the array
637 * it is constant
638 * nr_disks is total number of used slots.
639 * it should be raid_disks+spare_disks
640 * spare_disks is the number of extra disks present
641 * see above
642 * active_disks is the number of working disks in
643 * active slots. (With raid_disks)
644 * working_disks is the total number of working disks,
645 * including spares
646 * failed_disks is the number of disks marked failed
647 *
648 * Ideally, the kernel would keep these (except raid_disks)
649 * up-to-date as we ADD_NEW_DISK, but it doesn't (yet).
650 * So for now, we assume that all raid and spare
651 * devices will be given.
652 */
653 info.array.spare_disks=sparedisks;
654 info.array.failed_disks=missing_disks;
655 info.array.nr_disks = info.array.working_disks
656 + info.array.failed_disks;
657 info.array.layout = layout;
658 info.array.chunk_size = chunk*1024;
659
660 if (name == NULL || *name == 0) {
661 /* base name on mddev */
662 /* /dev/md0 -> 0
663 * /dev/md_d0 -> d0
664 * /dev/md/1 -> 1
665 * /dev/md/d1 -> d1
666 * /dev/md/home -> home
667 * /dev/mdhome -> home
668 */
669 /* FIXME compare this with rules in create_mddev */
670 name = strrchr(mddev, '/');
671 if (name) {
672 name++;
673 if (strncmp(name, "md_d", 4)==0 &&
674 strlen(name) > 4 &&
675 isdigit(name[4]) &&
676 (name-mddev) == 5 /* /dev/ */)
677 name += 3;
678 else if (strncmp(name, "md", 2)==0 &&
679 strlen(name) > 2 &&
680 isdigit(name[2]) &&
681 (name-mddev) == 5 /* /dev/ */)
682 name += 2;
683 }
684 }
685 if (!st->ss->init_super(st, &info.array, size, name, c->homehost, uuid))
686 goto abort_locked;
687
688 total_slots = info.array.nr_disks;
689 st->ss->getinfo_super(st, &info, NULL);
690 sysfs_init(&info, mdfd, 0);
691
692 if (did_default && c->verbose >= 0) {
693 if (is_subarray(info.text_version)) {
694 int dnum = devname2devnum(info.text_version+1);
695 char *path;
696 int mdp = get_mdp_major();
697 struct mdinfo *mdi;
698 if (dnum > 0)
699 path = map_dev(MD_MAJOR, dnum, 1);
700 else
701 path = map_dev(mdp, (-1-dnum)<< 6, 1);
702
703 mdi = sysfs_read(-1, dnum, GET_VERSION);
704
705 pr_err("Creating array inside "
706 "%s container %s\n",
707 mdi?mdi->text_version:"managed", path);
708 sysfs_free(mdi);
709 } else
710 pr_err("Defaulting to version"
711 " %s metadata\n", info.text_version);
712 }
713
714 map_update(&map, fd2devnum(mdfd), info.text_version,
715 info.uuid, chosen_name);
716 map_unlock(&map);
717
718 if (bitmap_file && vers < 9003) {
719 major_num = BITMAP_MAJOR_HOSTENDIAN;
720 #ifdef __BIG_ENDIAN
721 pr_err("Warning - bitmaps created on this kernel are not portable\n"
722 " between different architectured. Consider upgrading the Linux kernel.\n");
723 #endif
724 }
725
726 if (bitmap_file && strcmp(bitmap_file, "internal")==0) {
727 if ((vers%100) < 2) {
728 pr_err("internal bitmaps not supported by this kernel.\n");
729 goto abort;
730 }
731 if (!st->ss->add_internal_bitmap) {
732 pr_err("internal bitmaps not supported with %s metadata\n",
733 st->ss->name);
734 goto abort;
735 }
736 if (!st->ss->add_internal_bitmap(st, &bitmap_chunk,
737 c->delay, write_behind,
738 bitmapsize, 1, major_num)) {
739 pr_err("Given bitmap chunk size not supported.\n");
740 goto abort;
741 }
742 bitmap_file = NULL;
743 }
744
745
746 sysfs_init(&info, mdfd, 0);
747
748 if (st->ss->external && st->container_dev != NoMdDev) {
749 /* member */
750
751 /* When creating a member, we need to be careful
752 * to negotiate with mdmon properly.
753 * If it is already running, we cannot write to
754 * the devices and must ask it to do that part.
755 * If it isn't running, we write to the devices,
756 * and then start it.
757 * We hold an exclusive open on the container
758 * device to make sure mdmon doesn't exit after
759 * we checked that it is running.
760 *
761 * For now, fail if it is already running.
762 */
763 container_fd = open_dev_excl(st->container_dev);
764 if (container_fd < 0) {
765 pr_err("Cannot get exclusive "
766 "open on container - weird.\n");
767 goto abort;
768 }
769 if (mdmon_running(st->container_dev)) {
770 if (c->verbose)
771 pr_err("reusing mdmon "
772 "for %s.\n",
773 devnum2devname(st->container_dev));
774 st->update_tail = &st->updates;
775 } else
776 need_mdmon = 1;
777 }
778 rv = set_array_info(mdfd, st, &info);
779 if (rv) {
780 pr_err("failed to set array info for %s: %s\n",
781 mddev, strerror(errno));
782 goto abort;
783 }
784
785 if (bitmap_file) {
786 int uuid[4];
787
788 st->ss->uuid_from_super(st, uuid);
789 if (CreateBitmap(bitmap_file, c->force, (char*)uuid, bitmap_chunk,
790 c->delay, write_behind,
791 bitmapsize,
792 major_num)) {
793 goto abort;
794 }
795 bitmap_fd = open(bitmap_file, O_RDWR);
796 if (bitmap_fd < 0) {
797 pr_err("weird: %s cannot be openned\n",
798 bitmap_file);
799 goto abort;
800 }
801 if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
802 pr_err("Cannot set bitmap file for %s: %s\n",
803 mddev, strerror(errno));
804 goto abort;
805 }
806 }
807
808 infos = xmalloc(sizeof(*infos) * total_slots);
809
810 for (pass=1; pass <=2 ; pass++) {
811 struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
812
813 for (dnum=0, dv = devlist ; dv ;
814 dv=(dv->next)?(dv->next):moved_disk, dnum++) {
815 int fd;
816 struct stat stb;
817 struct mdinfo *inf = &infos[dnum];
818
819 if (dnum >= total_slots)
820 abort();
821 if (dnum == insert_point) {
822 moved_disk = dv;
823 continue;
824 }
825 if (strcasecmp(dv->devname, "missing")==0)
826 continue;
827 if (have_container)
828 moved_disk = NULL;
829 if (have_container && dnum < info.array.raid_disks - 1)
830 /* repeatedly use the container */
831 moved_disk = dv;
832
833 switch(pass) {
834 case 1:
835 *inf = info;
836
837 inf->disk.number = dnum;
838 inf->disk.raid_disk = dnum;
839 if (inf->disk.raid_disk < raiddisks)
840 inf->disk.state = (1<<MD_DISK_ACTIVE) |
841 (1<<MD_DISK_SYNC);
842 else
843 inf->disk.state = 0;
844
845 if (dv->writemostly == 1)
846 inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
847
848 if (have_container)
849 fd = -1;
850 else {
851 if (st->ss->external &&
852 st->container_dev != NoMdDev)
853 fd = open(dv->devname, O_RDWR);
854 else
855 fd = open(dv->devname, O_RDWR|O_EXCL);
856
857 if (fd < 0) {
858 pr_err("failed to open %s "
859 "after earlier success - aborting\n",
860 dv->devname);
861 goto abort;
862 }
863 fstat(fd, &stb);
864 inf->disk.major = major(stb.st_rdev);
865 inf->disk.minor = minor(stb.st_rdev);
866 }
867 if (fd >= 0)
868 remove_partitions(fd);
869 if (st->ss->add_to_super(st, &inf->disk,
870 fd, dv->devname)) {
871 ioctl(mdfd, STOP_ARRAY, NULL);
872 goto abort;
873 }
874 st->ss->getinfo_super(st, inf, NULL);
875 safe_mode_delay = inf->safe_mode_delay;
876
877 if (have_container && c->verbose > 0)
878 pr_err("Using %s for device %d\n",
879 map_dev(inf->disk.major,
880 inf->disk.minor,
881 0), dnum);
882
883 if (!have_container) {
884 /* getinfo_super might have lost these ... */
885 inf->disk.major = major(stb.st_rdev);
886 inf->disk.minor = minor(stb.st_rdev);
887 }
888 break;
889 case 2:
890 inf->errors = 0;
891
892 rv = add_disk(mdfd, st, &info, inf);
893
894 if (rv) {
895 pr_err("ADD_NEW_DISK for %s "
896 "failed: %s\n",
897 dv->devname, strerror(errno));
898 goto abort;
899 }
900 break;
901 }
902 if (!have_container &&
903 dv == moved_disk && dnum != insert_point) break;
904 }
905 if (pass == 1) {
906 struct mdinfo info_new;
907 struct map_ent *me = NULL;
908
909 /* check to see if the uuid has changed due to these
910 * metadata changes, and if so update the member array
911 * and container uuid. Note ->write_init_super clears
912 * the subarray cursor such that ->getinfo_super once
913 * again returns container info.
914 */
915 map_lock(&map);
916 st->ss->getinfo_super(st, &info_new, NULL);
917 if (st->ss->external && level != LEVEL_CONTAINER &&
918 !same_uuid(info_new.uuid, info.uuid, 0)) {
919 map_update(&map, fd2devnum(mdfd),
920 info_new.text_version,
921 info_new.uuid, chosen_name);
922 me = map_by_devnum(&map, st->container_dev);
923 }
924
925 if (st->ss->write_init_super(st)) {
926 st->ss->free_super(st);
927 goto abort_locked;
928 }
929
930 /* update parent container uuid */
931 if (me) {
932 char *path = xstrdup(me->path);
933
934 st->ss->getinfo_super(st, &info_new, NULL);
935 map_update(&map, st->container_dev,
936 info_new.text_version,
937 info_new.uuid, path);
938 free(path);
939 }
940 map_unlock(&map);
941
942 flush_metadata_updates(st);
943 st->ss->free_super(st);
944 }
945 }
946 free(infos);
947
948 if (level == LEVEL_CONTAINER) {
949 /* No need to start. But we should signal udev to
950 * create links */
951 sysfs_uevent(&info, "change");
952 if (c->verbose >= 0)
953 pr_err("container %s prepared.\n", mddev);
954 wait_for(chosen_name, mdfd);
955 } else if (c->runstop == 1 || subdevs >= raiddisks) {
956 if (st->ss->external) {
957 int err;
958 switch(level) {
959 case LEVEL_LINEAR:
960 case LEVEL_MULTIPATH:
961 case 0:
962 err = sysfs_set_str(&info, NULL, "array_state",
963 c->readonly
964 ? "readonly"
965 : "active");
966 need_mdmon = 0;
967 break;
968 default:
969 err = sysfs_set_str(&info, NULL, "array_state",
970 "readonly");
971 break;
972 }
973 sysfs_set_safemode(&info, safe_mode_delay);
974 if (err) {
975 pr_err("failed to"
976 " activate array.\n");
977 ioctl(mdfd, STOP_ARRAY, NULL);
978 goto abort;
979 }
980 } else if (c->readonly &&
981 sysfs_attribute_available(
982 &info, NULL, "array_state")) {
983 if (sysfs_set_str(&info, NULL,
984 "array_state", "readonly") < 0) {
985 pr_err("Failed to start array: %s\n",
986 strerror(errno));
987 ioctl(mdfd, STOP_ARRAY, NULL);
988 goto abort;
989 }
990 } else {
991 /* param is not actually used */
992 mdu_param_t param;
993 if (ioctl(mdfd, RUN_ARRAY, &param)) {
994 pr_err("RUN_ARRAY failed: %s\n",
995 strerror(errno));
996 if (info.array.chunk_size & (info.array.chunk_size-1)) {
997 cont_err("Problem may be that "
998 "chunk size is not a power of 2\n");
999 }
1000 ioctl(mdfd, STOP_ARRAY, NULL);
1001 goto abort;
1002 }
1003 }
1004 if (c->verbose >= 0)
1005 pr_err("array %s started.\n", mddev);
1006 if (st->ss->external && st->container_dev != NoMdDev) {
1007 if (need_mdmon)
1008 start_mdmon(st->container_dev);
1009
1010 ping_monitor_by_id(st->container_dev);
1011 close(container_fd);
1012 }
1013 wait_for(chosen_name, mdfd);
1014 } else {
1015 pr_err("not starting array - not enough devices.\n");
1016 }
1017 close(mdfd);
1018 return 0;
1019
1020 abort:
1021 map_lock(&map);
1022 abort_locked:
1023 map_remove(&map, fd2devnum(mdfd));
1024 map_unlock(&map);
1025
1026 if (mdfd >= 0)
1027 close(mdfd);
1028 return 1;
1029 }