]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
Delay creation of array devices for assemble/build/create
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include <ctype.h>
32
33 static int name_matches(char *found, char *required, char *homehost)
34 {
35 /* See if the name found matches the required name, possibly
36 * prefixed with 'homehost'
37 */
38 char fnd[33];
39
40 strncpy(fnd, found, 32);
41 fnd[32] = 0;
42 if (strcmp(found, required)==0)
43 return 1;
44 if (homehost) {
45 int l = strlen(homehost);
46 if (l < 32 && fnd[l] == ':' &&
47 strcmp(fnd+l+1, required)==0)
48 return 1;
49 }
50 return 0;
51 }
52
53 int Assemble(struct supertype *st, char *mddev,
54 mddev_ident_t ident,
55 mddev_dev_t devlist, char *backup_file,
56 int readonly, int runstop,
57 char *update, char *homehost,
58 int verbose, int force)
59 {
60 /*
61 * The task of Assemble is to find a collection of
62 * devices that should (according to their superblocks)
63 * form an array, and to give this collection to the MD driver.
64 * In Linux-2.4 and later, this involves submitting a
65 * SET_ARRAY_INFO ioctl with no arg - to prepare
66 * the array - and then submit a number of
67 * ADD_NEW_DISK ioctls to add disks into
68 * the array. Finally RUN_ARRAY might
69 * be submitted to start the array.
70 *
71 * Much of the work of Assemble is in finding and/or
72 * checking the disks to make sure they look right.
73 *
74 * If mddev is not set, then scan must be set and we
75 * read through the config file for dev+uuid mapping
76 * We recurse, setting mddev, for each device that
77 * - isn't running
78 * - has a valid uuid (or any uuid if !uuidset)
79 *
80 * If mddev is set, we try to determine state of md.
81 * check version - must be at least 0.90.0
82 * check kernel version. must be at least 2.4.
83 * If not, we can possibly fall back on START_ARRAY
84 * Try to GET_ARRAY_INFO.
85 * If possible, give up
86 * If not, try to STOP_ARRAY just to make sure
87 *
88 * If !uuidset and scan, look in conf-file for uuid
89 * If not found, give up
90 * If !devlist and scan and uuidset, get list of devs from conf-file
91 *
92 * For each device:
93 * Check superblock - discard if bad
94 * Check uuid (set if we don't have one) - discard if no match
95 * Check superblock similarity if we have a superblock - discard if different
96 * Record events, devicenum
97 * This should give us a list of devices for the array
98 * We should collect the most recent event number
99 *
100 * Count disks with recent enough event count
101 * While force && !enough disks
102 * Choose newest rejected disks, update event count
103 * mark clean and rewrite superblock
104 * If recent kernel:
105 * SET_ARRAY_INFO
106 * foreach device with recent events : ADD_NEW_DISK
107 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
108 * If old kernel:
109 * Check the device numbers in superblock are right
110 * update superblock if any changes
111 * START_ARRAY
112 *
113 */
114 int mdfd = -1;
115 int clean = 0;
116 int auto_assem = (mddev == NULL);
117 int old_linux = 0;
118 int vers = 0; /* Keep gcc quite - it really is initialised */
119 struct {
120 char *devname;
121 int uptodate; /* set once we decide that this device is as
122 * recent as everything else in the array.
123 */
124 struct mdinfo i;
125 } *devices;
126 int *best = NULL; /* indexed by raid_disk */
127 unsigned int bestcnt = 0;
128 int devcnt = 0;
129 unsigned int okcnt, sparecnt;
130 unsigned int req_cnt;
131 unsigned int i;
132 int most_recent = 0;
133 int chosen_drive;
134 int change = 0;
135 int inargv = 0;
136 int bitmap_done;
137 int start_partial_ok = (runstop >= 0) &&
138 (force || devlist==NULL || auto_assem);
139 unsigned int num_devs;
140 mddev_dev_t tmpdev;
141 struct mdinfo info;
142 char *avail;
143 int nextspare = 0;
144 int uuid_for_name = 0;
145
146 memset(&info, 0, sizeof(info));
147
148 if (get_linux_version() < 2004000)
149 old_linux = 1;
150
151 if (mddev != NULL) {
152 /* We need to create the device */
153 mdfd = create_mddev(mddev, 1);
154 if (mdfd < 0)
155 return 1;
156 }
157
158 if (mdfd >= 0) {
159 vers = md_get_version(mdfd);
160 if (vers <= 0) {
161 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
162 close(mdfd);
163 return 1;
164 }
165 if (vers < 9000) {
166 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
167 " Upgrade your kernel or try --build\n");
168 close(mdfd);
169 return 1;
170 }
171
172 if (ioctl(mdfd, GET_ARRAY_INFO, &info.array)>=0) {
173 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
174 mddev);
175 close(mdfd);
176 return 1;
177 }
178 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
179 }
180 /*
181 * If any subdevs are listed, then any that don't
182 * match ident are discarded. Remainder must all match and
183 * become the array.
184 * If no subdevs, then we scan all devices in the config file, but
185 * there must be something in the identity
186 */
187
188 if (!devlist &&
189 ident->uuid_set == 0 &&
190 ident->super_minor < 0 &&
191 ident->devices == NULL) {
192 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
193 mddev ? mddev : "further assembly");
194 if (mdfd >= 0)
195 close(mdfd);
196 return 1;
197 }
198
199 /* if the configuration specifies a container then we use that to
200 * determine the devices and retrieve the array configuration
201 */
202 #ifndef MDASSEMBLE
203 if (ident->container && ident->member) {
204 int cfd = open(ident->container, O_RDWR);
205 struct mdinfo *mdi;
206 struct supertype container;
207
208 if (verbose>0)
209 fprintf(stderr, Name ": looking to assemble member array %s"
210 " inside container %s\n", ident->member, ident->container);
211 if (cfd < 0) {
212 if (verbose>0)
213 fprintf(stderr, Name ": unable to open container %s: %s\n",
214 ident->container, strerror(errno));
215 return 1;
216 }
217
218 mdi = sysfs_read(cfd, fd2devnum(cfd), GET_VERSION);
219 if (!mdi) {
220 close(cfd);
221 if (verbose>0)
222 fprintf(stderr, Name ": unable to read container %s\n",
223 ident->container);
224 return 1;
225 }
226 container.ss = find_metadata_methods(mdi->text_version);
227 sysfs_free(mdi);
228 if (!container.ss) {
229 close(cfd);
230 fprintf(stderr, Name ": %s uses unknown metadata: %s\n",
231 ident->container, mdi->text_version);
232 return 1;
233 }
234 if (container.ss->load_super(&container, cfd, ident->container)) {
235 fprintf(stderr, Name ": Cannot load metadata for container %s\n",
236 ident->container);
237 return 1;
238 }
239
240 return Incremental_container(&container, ident->container,
241 verbose, runstop, ident->autof);
242 }
243 #endif
244 if (devlist == NULL)
245 devlist = conf_get_devs();
246 else if (mddev)
247 inargv = 1;
248
249 try_again:
250
251 tmpdev = devlist; num_devs = 0;
252 while (tmpdev) {
253 if (tmpdev->used)
254 tmpdev->used = 2;
255 else
256 num_devs++;
257 tmpdev = tmpdev->next;
258 }
259 devices = malloc(num_devs * sizeof(*devices));
260
261 if (!st && ident->st) st = ident->st;
262
263 if (verbose>0)
264 fprintf(stderr, Name ": looking for devices for %s\n",
265 mddev ? mddev : "further assembly");
266
267 /* first walk the list of devices to find a consistent set
268 * that match the criterea, if that is possible.
269 * We flag the one we like with 'used'.
270 */
271 for (tmpdev = devlist;
272 tmpdev;
273 tmpdev = tmpdev->next) {
274 char *devname = tmpdev->devname;
275 int dfd;
276 struct stat stb;
277 struct supertype *tst = dup_super(st);
278
279 if (tmpdev->used > 1) continue;
280
281 if (ident->devices &&
282 !match_oneof(ident->devices, devname)) {
283 if ((inargv && verbose>=0) || verbose > 0)
284 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
285 continue;
286 }
287
288 dfd = dev_open(devname, O_RDONLY|O_EXCL);
289 if (dfd < 0) {
290 if ((inargv && verbose >= 0) || verbose > 0)
291 fprintf(stderr, Name ": cannot open device %s: %s\n",
292 devname, strerror(errno));
293 tmpdev->used = 2;
294 } else if (fstat(dfd, &stb)< 0) {
295 /* Impossible! */
296 fprintf(stderr, Name ": fstat failed for %s: %s\n",
297 devname, strerror(errno));
298 tmpdev->used = 2;
299 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
300 fprintf(stderr, Name ": %s is not a block device.\n",
301 devname);
302 tmpdev->used = 2;
303 } else if (!tst && (tst = guess_super(dfd)) == NULL) {
304 if ((inargv && verbose >= 0) || verbose > 0)
305 fprintf(stderr, Name ": no recogniseable superblock on %s\n",
306 devname);
307 tmpdev->used = 2;
308 } else if (tst->ss->load_super(tst,dfd, NULL)) {
309 if ((inargv && verbose >= 0) || verbose > 0)
310 fprintf( stderr, Name ": no RAID superblock on %s\n",
311 devname);
312 } else {
313 tst->ss->getinfo_super(tst, &info);
314 }
315 if (dfd >= 0) close(dfd);
316
317 if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
318 (!tst || !tst->sb ||
319 same_uuid(info.uuid, ident->uuid, tst->ss->swapuuid)==0)) {
320 if ((inargv && verbose >= 0) || verbose > 0)
321 fprintf(stderr, Name ": %s has wrong uuid.\n",
322 devname);
323 goto loop;
324 }
325 if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
326 (!tst || !tst->sb ||
327 name_matches(info.name, ident->name, homehost)==0)) {
328 if ((inargv && verbose >= 0) || verbose > 0)
329 fprintf(stderr, Name ": %s has wrong name.\n",
330 devname);
331 goto loop;
332 }
333 if (ident->super_minor != UnSet &&
334 (!tst || !tst->sb ||
335 ident->super_minor != info.array.md_minor)) {
336 if ((inargv && verbose >= 0) || verbose > 0)
337 fprintf(stderr, Name ": %s has wrong super-minor.\n",
338 devname);
339 goto loop;
340 }
341 if (ident->level != UnSet &&
342 (!tst || !tst->sb ||
343 ident->level != info.array.level)) {
344 if ((inargv && verbose >= 0) || verbose > 0)
345 fprintf(stderr, Name ": %s has wrong raid level.\n",
346 devname);
347 goto loop;
348 }
349 if (ident->raid_disks != UnSet &&
350 (!tst || !tst->sb ||
351 ident->raid_disks!= info.array.raid_disks)) {
352 if ((inargv && verbose >= 0) || verbose > 0)
353 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
354 devname);
355 goto loop;
356 }
357 if (mdfd < 0) {
358 if (tst == NULL || tst->sb == NULL)
359 continue;
360 switch(tst->ss->match_home(tst, homehost))
361 {
362 case 1: /* happy with match. */
363 break;
364 case -1: /* cannot match */
365 uuid_for_name = 1;
366 break;
367 case 0: /* Doesn't match */
368 if (update)
369 /* We are changing the name*/
370 break;
371 if ((inargv && verbose >= 0) || verbose > 0)
372 fprintf(stderr, Name ": %s is not built for "
373 "host %s - using UUID for "
374 "device name.\n",
375 devname, homehost);
376
377 /* Auto-assemble, and this is not a usable host */
378 /* if update != NULL, we are updating the host
379 * name... */
380 uuid_for_name = 1;
381 break;
382 }
383 }
384 /* If we are this far, then we are nearly commited to this device.
385 * If the super_block doesn't exist, or doesn't match others,
386 * then we probably cannot continue
387 * However if one of the arrays is for the homehost, and
388 * the other isn't that can disambiguate.
389 */
390
391 if (!tst || !tst->sb) {
392 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
393 devname);
394 if (st)
395 st->ss->free_super(st);
396 if (mdfd >= 0)
397 close(mdfd);
398 return 1;
399 }
400
401 if (st == NULL)
402 st = dup_super(tst);
403 if (st->minor_version == -1)
404 st->minor_version = tst->minor_version;
405 if (st->ss != tst->ss ||
406 st->minor_version != tst->minor_version ||
407 st->ss->compare_super(st, tst) != 0) {
408 /* Some mismatch. If exactly one array matches this host,
409 * we can resolve on that one.
410 * Or, if we are auto assembling, we just ignore the second
411 * for now.
412 */
413 if (mdfd < 0)
414 goto loop;
415 if (homehost) {
416 int first = st->ss->match_home(st, homehost);
417 int last = tst->ss->match_home(tst, homehost);
418 if (first != last &&
419 (first == 1 || last == 1)) {
420 /* We can do something */
421 if (first) {/* just ignore this one */
422 if ((inargv && verbose >= 0) || verbose > 0)
423 fprintf(stderr, Name ": %s misses out due to wrong homehost\n",
424 devname);
425 goto loop;
426 } else { /* reject all those sofar */
427 mddev_dev_t td;
428 if ((inargv && verbose >= 0) || verbose > 0)
429 fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n",
430 devname);
431 for (td=devlist; td != tmpdev; td=td->next)
432 if (td->used == 1)
433 td->used = 0;
434 tmpdev->used = 1;
435 goto loop;
436 }
437 }
438 }
439 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
440 devname);
441 tst->ss->free_super(tst);
442 st->ss->free_super(st);
443 if (mdfd >= 0)
444 close(mdfd);
445 return 1;
446 }
447
448 tmpdev->used = 1;
449
450 loop:
451 if (tst)
452 tst->ss->free_super(tst);
453 }
454
455 if (mdfd < 0) {
456 /* So... it is up to me to open the device.
457 * We create a name '/dev/md/XXX' based on the info in the
458 * superblock, and call create_mddev on that
459 */
460 mdu_array_info_t inf;
461 char *c;
462 char nbuf[64];
463 int rc;
464
465 if (!st || !st->sb) {
466 return 2;
467 }
468 st->ss->getinfo_super(st, &info);
469 if (uuid_for_name)
470 c = fname_from_uuid(st, &info, nbuf, '-');
471 else {
472 c = strchr(info.name, ':');
473 if (c) c++; else c= info.name;
474 }
475 if (isdigit(*c) && ((ident->autof & 7)==4 || (ident->autof&7)==6))
476 /* /dev/md/d0 style for partitionable */
477 rc = asprintf(&mddev, "/dev/md/d%s", c);
478 else
479 rc = asprintf(&mddev, "/dev/md/%s", c);
480 if (rc < 0)
481 mdfd = -1;
482 else
483 mdfd = create_mddev(mddev, ident->autof);
484 if (mdfd < 0) {
485 st->ss->free_super(st);
486 free(devices);
487 goto try_again;
488 }
489 vers = md_get_version(mdfd);
490 if (ioctl(mdfd, GET_ARRAY_INFO, &inf)==0) {
491 for (tmpdev = devlist ;
492 tmpdev && tmpdev->used != 1;
493 tmpdev = tmpdev->next)
494 ;
495 fprintf(stderr, Name ": %s already active, cannot restart it!\n", mddev);
496 if (tmpdev)
497 fprintf(stderr, Name ": %s needed for %s...\n",
498 mddev, tmpdev->devname);
499 close(mdfd);
500 mdfd = -3;
501 st->ss->free_super(st);
502 free(devices);
503 goto try_again;
504 }
505 }
506
507 /* Ok, no bad inconsistancy, we can try updating etc */
508 bitmap_done = 0;
509 for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
510 char *devname = tmpdev->devname;
511 struct stat stb;
512 /* looks like a good enough match to update the super block if needed */
513 #ifndef MDASSEMBLE
514 if (update) {
515 int dfd;
516 /* prepare useful information in info structures */
517 struct stat stb2;
518 struct supertype *tst;
519 fstat(mdfd, &stb2);
520
521 if (strcmp(update, "uuid")==0 &&
522 !ident->uuid_set) {
523 int rfd;
524 if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
525 read(rfd, ident->uuid, 16) != 16) {
526 *(__u32*)(ident->uuid) = random();
527 *(__u32*)(ident->uuid+1) = random();
528 *(__u32*)(ident->uuid+2) = random();
529 *(__u32*)(ident->uuid+3) = random();
530 }
531 if (rfd >= 0) close(rfd);
532 }
533 dfd = dev_open(devname, O_RDWR|O_EXCL);
534
535 remove_partitions(dfd);
536
537 tst = dup_super(st);
538 tst->ss->load_super(tst, dfd, NULL);
539 tst->ss->getinfo_super(tst, &info);
540
541 memcpy(info.uuid, ident->uuid, 16);
542 strcpy(info.name, ident->name);
543 info.array.md_minor = minor(stb2.st_rdev);
544
545 tst->ss->update_super(tst, &info, update,
546 devname, verbose,
547 ident->uuid_set, homehost);
548 if (strcmp(update, "uuid")==0 &&
549 !ident->uuid_set) {
550 ident->uuid_set = 1;
551 memcpy(ident->uuid, info.uuid, 16);
552 }
553 if (dfd < 0)
554 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
555 devname);
556 else if (tst->ss->store_super(tst, dfd))
557 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
558 devname);
559 if (dfd >= 0)
560 close(dfd);
561
562 if (strcmp(update, "uuid")==0 &&
563 ident->bitmap_fd >= 0 && !bitmap_done) {
564 if (bitmap_update_uuid(ident->bitmap_fd,
565 info.uuid,
566 tst->ss->swapuuid) != 0)
567 fprintf(stderr, Name ": Could not update uuid on external bitmap.\n");
568 else
569 bitmap_done = 1;
570 }
571 tst->ss->free_super(tst);
572 } else
573 #endif
574 {
575 struct supertype *tst = dup_super(st);
576 int dfd;
577 dfd = dev_open(devname, O_RDWR|O_EXCL);
578
579 remove_partitions(dfd);
580
581 tst->ss->load_super(tst, dfd, NULL);
582 tst->ss->getinfo_super(tst, &info);
583 tst->ss->free_super(tst);
584 close(dfd);
585 }
586
587 stat(devname, &stb);
588
589 if (verbose > 0)
590 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
591 devname, mddev, info.disk.raid_disk);
592 devices[devcnt].devname = devname;
593 devices[devcnt].uptodate = 0;
594 devices[devcnt].i = info;
595 devices[devcnt].i.disk.major = major(stb.st_rdev);
596 devices[devcnt].i.disk.minor = minor(stb.st_rdev);
597 if (most_recent < devcnt) {
598 if (devices[devcnt].i.events
599 > devices[most_recent].i.events)
600 most_recent = devcnt;
601 }
602 if (info.array.level == -4)
603 /* with multipath, the raid_disk from the superblock is meaningless */
604 i = devcnt;
605 else
606 i = devices[devcnt].i.disk.raid_disk;
607 if (i+1 == 0) {
608 if (nextspare < info.array.raid_disks)
609 nextspare = info.array.raid_disks;
610 i = nextspare++;
611 } else {
612 if (i >= info.array.raid_disks &&
613 i >= nextspare)
614 nextspare = i+1;
615 }
616 if (i < 10000) {
617 if (i >= bestcnt) {
618 unsigned int newbestcnt = i+10;
619 int *newbest = malloc(sizeof(int)*newbestcnt);
620 unsigned int c;
621 for (c=0; c < newbestcnt; c++)
622 if (c < bestcnt)
623 newbest[c] = best[c];
624 else
625 newbest[c] = -1;
626 if (best)free(best);
627 best = newbest;
628 bestcnt = newbestcnt;
629 }
630 if (best[i] >=0 &&
631 devices[best[i]].i.events
632 == devices[devcnt].i.events
633 && (devices[best[i]].i.disk.minor
634 != devices[devcnt].i.disk.minor)
635 && st->ss == &super0
636 && info.array.level != LEVEL_MULTIPATH) {
637 /* two different devices with identical superblock.
638 * Could be a mis-detection caused by overlapping
639 * partitions. fail-safe.
640 */
641 fprintf(stderr, Name ": WARNING %s and %s appear"
642 " to have very similar superblocks.\n"
643 " If they are really different, "
644 "please --zero the superblock on one\n"
645 " If they are the same or overlap,"
646 " please remove one from %s.\n",
647 devices[best[i]].devname, devname,
648 inargv ? "the list" :
649 "the\n DEVICE list in mdadm.conf"
650 );
651 close(mdfd);
652 return 1;
653 }
654 if (best[i] == -1
655 || (devices[best[i]].i.events
656 < devices[devcnt].i.events))
657 best[i] = devcnt;
658 }
659 devcnt++;
660 }
661
662 if (devcnt == 0) {
663 fprintf(stderr, Name ": no devices found for %s\n",
664 mddev);
665 if (st)
666 st->ss->free_super(st);
667 close(mdfd);
668 return 1;
669 }
670
671 if (update && strcmp(update, "byteorder")==0)
672 st->minor_version = 90;
673
674 st->ss->getinfo_super(st, &info);
675 clean = info.array.state & 1;
676
677 /* now we have some devices that might be suitable.
678 * I wonder how many
679 */
680 avail = malloc(info.array.raid_disks);
681 memset(avail, 0, info.array.raid_disks);
682 okcnt = 0;
683 sparecnt=0;
684 for (i=0; i< bestcnt ;i++) {
685 int j = best[i];
686 int event_margin = 1; /* always allow a difference of '1'
687 * like the kernel does
688 */
689 if (j < 0) continue;
690 /* note: we ignore error flags in multipath arrays
691 * as they don't make sense
692 */
693 if (info.array.level != -4)
694 if (!(devices[j].i.disk.state & (1<<MD_DISK_SYNC))) {
695 if (!(devices[j].i.disk.state
696 & (1<<MD_DISK_FAULTY)))
697 sparecnt++;
698 continue;
699 }
700 if (devices[j].i.events+event_margin >=
701 devices[most_recent].i.events) {
702 devices[j].uptodate = 1;
703 if (i < info.array.raid_disks) {
704 okcnt++;
705 avail[i]=1;
706 } else
707 sparecnt++;
708 }
709 }
710 while (force && !enough(info.array.level, info.array.raid_disks,
711 info.array.layout, 1,
712 avail, okcnt)) {
713 /* Choose the newest best drive which is
714 * not up-to-date, update the superblock
715 * and add it.
716 */
717 int fd;
718 struct supertype *tst;
719 long long current_events;
720 chosen_drive = -1;
721 for (i=0; i<info.array.raid_disks && i < bestcnt; i++) {
722 int j = best[i];
723 if (j>=0 &&
724 !devices[j].uptodate &&
725 devices[j].i.events > 0 &&
726 (chosen_drive < 0 ||
727 devices[j].i.events
728 > devices[chosen_drive].i.events))
729 chosen_drive = j;
730 }
731 if (chosen_drive < 0)
732 break;
733 current_events = devices[chosen_drive].i.events;
734 add_another:
735 if (verbose >= 0)
736 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
737 devices[chosen_drive].devname,
738 devices[chosen_drive].i.disk.raid_disk,
739 (int)(devices[chosen_drive].i.events),
740 (int)(devices[most_recent].i.events));
741 fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
742 if (fd < 0) {
743 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
744 devices[chosen_drive].devname);
745 devices[chosen_drive].i.events = 0;
746 continue;
747 }
748 tst = dup_super(st);
749 if (tst->ss->load_super(tst,fd, NULL)) {
750 close(fd);
751 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
752 devices[chosen_drive].devname);
753 devices[chosen_drive].i.events = 0;
754 continue;
755 }
756 info.events = devices[most_recent].i.events;
757 tst->ss->update_super(tst, &info, "force-one",
758 devices[chosen_drive].devname, verbose,
759 0, NULL);
760
761 if (tst->ss->store_super(tst, fd)) {
762 close(fd);
763 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
764 devices[chosen_drive].devname);
765 devices[chosen_drive].i.events = 0;
766 tst->ss->free_super(tst);
767 continue;
768 }
769 close(fd);
770 devices[chosen_drive].i.events = devices[most_recent].i.events;
771 devices[chosen_drive].uptodate = 1;
772 avail[chosen_drive] = 1;
773 okcnt++;
774 tst->ss->free_super(tst);
775
776 /* If there are any other drives of the same vintage,
777 * add them in as well. We can't lose and we might gain
778 */
779 for (i=0; i<info.array.raid_disks && i < bestcnt ; i++) {
780 int j = best[i];
781 if (j >= 0 &&
782 !devices[j].uptodate &&
783 devices[j].i.events > 0 &&
784 devices[j].i.events == current_events) {
785 chosen_drive = j;
786 goto add_another;
787 }
788 }
789 }
790
791 /* Now we want to look at the superblock which the kernel will base things on
792 * and compare the devices that we think are working with the devices that the
793 * superblock thinks are working.
794 * If there are differences and --force is given, then update this chosen
795 * superblock.
796 */
797 chosen_drive = -1;
798 st->ss->free_super(st);
799 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
800 int j = best[i];
801 int fd;
802
803 if (j<0)
804 continue;
805 if (!devices[j].uptodate)
806 continue;
807 chosen_drive = j;
808 if ((fd=dev_open(devices[j].devname, O_RDONLY|O_EXCL))< 0) {
809 fprintf(stderr, Name ": Cannot open %s: %s\n",
810 devices[j].devname, strerror(errno));
811 close(mdfd);
812 return 1;
813 }
814 if (st->ss->load_super(st,fd, NULL)) {
815 close(fd);
816 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
817 devices[j].devname);
818 close(mdfd);
819 return 1;
820 }
821 close(fd);
822 }
823 if (st->sb == NULL) {
824 fprintf(stderr, Name ": No suitable drives found for %s\n", mddev);
825 close(mdfd);
826 return 1;
827 }
828 st->ss->getinfo_super(st, &info);
829 #ifndef MDASSEMBLE
830 sysfs_init(&info, mdfd, 0);
831 #endif
832 for (i=0; i<bestcnt; i++) {
833 int j = best[i];
834 unsigned int desired_state;
835
836 if (i < info.array.raid_disks)
837 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
838 else
839 desired_state = 0;
840
841 if (j<0)
842 continue;
843 if (!devices[j].uptodate)
844 continue;
845
846 devices[j].i.disk.state = desired_state;
847
848 if (st->ss->update_super(st, &devices[j].i, "assemble", NULL,
849 verbose, 0, NULL)) {
850 if (force) {
851 if (verbose >= 0)
852 fprintf(stderr, Name ": "
853 "clearing FAULTY flag for device %d in %s for %s\n",
854 j, mddev, devices[j].devname);
855 change = 1;
856 } else {
857 if (verbose >= -1)
858 fprintf(stderr, Name ": "
859 "device %d in %s has wrong state in superblock, but %s seems ok\n",
860 i, mddev, devices[j].devname);
861 }
862 }
863 #if 0
864 if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) {
865 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
866 i, mddev);
867 }
868 #endif
869 }
870 if (force && !clean &&
871 !enough(info.array.level, info.array.raid_disks,
872 info.array.layout, clean,
873 avail, okcnt)) {
874 change += st->ss->update_super(st, &info, "force-array",
875 devices[chosen_drive].devname, verbose,
876 0, NULL);
877 clean = 1;
878 }
879
880 if (change) {
881 int fd;
882 fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
883 if (fd < 0) {
884 fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
885 devices[chosen_drive].devname);
886 close(mdfd);
887 return 1;
888 }
889 if (st->ss->store_super(st, fd)) {
890 close(fd);
891 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
892 devices[chosen_drive].devname);
893 close(mdfd);
894 return 1;
895 }
896 close(fd);
897 }
898
899 /* If we are in the middle of a reshape we may need to restore saved data
900 * that was moved aside due to the reshape overwriting live data
901 * The code of doing this lives in Grow.c
902 */
903 #ifndef MDASSEMBLE
904 if (info.reshape_active) {
905 int err = 0;
906 int *fdlist = malloc(sizeof(int)* bestcnt);
907 for (i=0; i<bestcnt; i++) {
908 int j = best[i];
909 if (j >= 0) {
910 fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
911 if (fdlist[i] < 0) {
912 fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
913 devices[j].devname);
914 err = 1;
915 break;
916 }
917 } else
918 fdlist[i] = -1;
919 }
920 if (!err)
921 err = Grow_restart(st, &info, fdlist, bestcnt, backup_file);
922 while (i>0) {
923 i--;
924 if (fdlist[i]>=0) close(fdlist[i]);
925 }
926 if (err) {
927 fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
928 close(mdfd);
929 return err;
930 }
931 }
932 #endif
933 /* count number of in-sync devices according to the superblock.
934 * We must have this number to start the array without -s or -R
935 */
936 req_cnt = info.array.working_disks;
937
938 /* Almost ready to actually *do* something */
939 if (!old_linux) {
940 int rv;
941
942 rv = set_array_info(mdfd, st, &info);
943 if (rv) {
944 fprintf(stderr, Name ": failed to set array info for %s: %s\n",
945 mddev, strerror(errno));
946 close(mdfd);
947 return 1;
948 }
949 if (ident->bitmap_fd >= 0) {
950 if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
951 fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
952 close(mdfd);
953 return 1;
954 }
955 } else if (ident->bitmap_file) {
956 /* From config file */
957 int bmfd = open(ident->bitmap_file, O_RDWR);
958 if (bmfd < 0) {
959 fprintf(stderr, Name ": Could not open bitmap file %s\n",
960 ident->bitmap_file);
961 close(mdfd);
962 return 1;
963 }
964 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
965 fprintf(stderr, Name ": Failed to set bitmapfile for %s\n", mddev);
966 close(bmfd);
967 close(mdfd);
968 return 1;
969 }
970 close(bmfd);
971 }
972
973 /* First, add the raid disks, but add the chosen one last */
974 for (i=0; i<= bestcnt; i++) {
975 int j;
976 if (i < bestcnt) {
977 j = best[i];
978 if (j == chosen_drive)
979 continue;
980 } else
981 j = chosen_drive;
982
983 if (j >= 0 /* && devices[j].uptodate */) {
984 rv = add_disk(mdfd, st, &info, &devices[j].i);
985
986 if (rv) {
987 fprintf(stderr, Name ": failed to add "
988 "%s to %s: %s\n",
989 devices[j].devname,
990 mddev,
991 strerror(errno));
992 if (i < info.array.raid_disks
993 || i == bestcnt)
994 okcnt--;
995 else
996 sparecnt--;
997 } else if (verbose > 0)
998 fprintf(stderr, Name ": added %s "
999 "to %s as %d\n",
1000 devices[j].devname, mddev,
1001 devices[j].i.disk.raid_disk);
1002 } else if (verbose > 0 && i < info.array.raid_disks)
1003 fprintf(stderr, Name ": no uptodate device for "
1004 "slot %d of %s\n",
1005 i, mddev);
1006 }
1007
1008 if (info.array.level == LEVEL_CONTAINER) {
1009 if (verbose >= 0) {
1010 fprintf(stderr, Name ": Container %s has been "
1011 "assembled with %d drive%s",
1012 mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s");
1013 if (okcnt < info.array.raid_disks)
1014 fprintf(stderr, " (out of %d)",
1015 info.array.raid_disks);
1016 fprintf(stderr, "\n");
1017 }
1018 close(mdfd);
1019 return 0;
1020 }
1021
1022 if (runstop == 1 ||
1023 (runstop <= 0 &&
1024 ( enough(info.array.level, info.array.raid_disks,
1025 info.array.layout, clean, avail, okcnt) &&
1026 (okcnt >= req_cnt || start_partial_ok)
1027 ))) {
1028 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
1029 if (verbose >= 0) {
1030 fprintf(stderr, Name ": %s has been started with %d drive%s",
1031 mddev, okcnt, okcnt==1?"":"s");
1032 if (okcnt < info.array.raid_disks)
1033 fprintf(stderr, " (out of %d)", info.array.raid_disks);
1034 if (sparecnt)
1035 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
1036 fprintf(stderr, ".\n");
1037 }
1038 close(mdfd);
1039 if (auto_assem) {
1040 int usecs = 1;
1041 /* There is a nasty race with 'mdadm --monitor'.
1042 * If it opens this device before we close it,
1043 * it gets an incomplete open on which IO
1044 * doesn't work and the capacity is
1045 * wrong.
1046 * If we reopen (to check for layered devices)
1047 * before --monitor closes, we loose.
1048 *
1049 * So: wait upto 1 second for there to be
1050 * a non-zero capacity.
1051 */
1052 while (usecs < 1000) {
1053 mdfd = open(mddev, O_RDONLY);
1054 if (mdfd >= 0) {
1055 unsigned long long size;
1056 if (get_dev_size(mdfd, NULL, &size) &&
1057 size > 0)
1058 break;
1059 close(mdfd);
1060 }
1061 usleep(usecs);
1062 usecs <<= 1;
1063 }
1064 }
1065 return 0;
1066 }
1067 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
1068 mddev, strerror(errno));
1069
1070 if (!enough(info.array.level, info.array.raid_disks,
1071 info.array.layout, 1, avail, okcnt))
1072 fprintf(stderr, Name ": Not enough devices to "
1073 "start the array.\n");
1074 else if (!enough(info.array.level,
1075 info.array.raid_disks,
1076 info.array.layout, clean,
1077 avail, okcnt))
1078 fprintf(stderr, Name ": Not enough devices to "
1079 "start the array while not clean "
1080 "- consider --force.\n");
1081
1082 if (auto_assem)
1083 ioctl(mdfd, STOP_ARRAY, NULL);
1084 close(mdfd);
1085 return 1;
1086 }
1087 if (runstop == -1) {
1088 fprintf(stderr, Name ": %s assembled from %d drive%s",
1089 mddev, okcnt, okcnt==1?"":"s");
1090 if (okcnt != info.array.raid_disks)
1091 fprintf(stderr, " (out of %d)", info.array.raid_disks);
1092 fprintf(stderr, ", but not started.\n");
1093 close(mdfd);
1094 return 0;
1095 }
1096 if (verbose >= -1) {
1097 fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
1098 if (sparecnt)
1099 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
1100 if (!enough(info.array.level, info.array.raid_disks,
1101 info.array.layout, 1, avail, okcnt))
1102 fprintf(stderr, " - not enough to start the array.\n");
1103 else if (!enough(info.array.level,
1104 info.array.raid_disks,
1105 info.array.layout, clean,
1106 avail, okcnt))
1107 fprintf(stderr, " - not enough to start the "
1108 "array while not clean - consider "
1109 "--force.\n");
1110 else {
1111 if (req_cnt == info.array.raid_disks)
1112 fprintf(stderr, " - need all %d to start it", req_cnt);
1113 else
1114 fprintf(stderr, " - need %d of %d to start", req_cnt, info.array.raid_disks);
1115 fprintf(stderr, " (use --run to insist).\n");
1116 }
1117 }
1118 if (auto_assem)
1119 ioctl(mdfd, STOP_ARRAY, NULL);
1120 return 1;
1121 } else {
1122 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
1123 * been updated to point to the current locations of devices.
1124 * so we can just start the array
1125 */
1126 unsigned long dev;
1127 dev = makedev(devices[chosen_drive].i.disk.major,
1128 devices[chosen_drive].i.disk.minor);
1129 if (ioctl(mdfd, START_ARRAY, dev)) {
1130 fprintf(stderr, Name ": Cannot start array: %s\n",
1131 strerror(errno));
1132 }
1133
1134 }
1135 close(mdfd);
1136 return 0;
1137 }