]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
Assemble: block attempts to reassemble container members
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include <ctype.h>
32
33 static int name_matches(char *found, char *required, char *homehost)
34 {
35 /* See if the name found matches the required name, possibly
36 * prefixed with 'homehost'
37 */
38 char fnd[33];
39
40 strncpy(fnd, found, 32);
41 fnd[32] = 0;
42 if (strcmp(found, required)==0)
43 return 1;
44 if (homehost) {
45 int l = strlen(homehost);
46 if (l < 32 && fnd[l] == ':' &&
47 strcmp(fnd+l+1, required)==0)
48 return 1;
49 }
50 return 0;
51 }
52
53 /*static */ int is_member_busy(char *metadata_version)
54 {
55 /* check if the given member array is active */
56 struct mdstat_ent *mdstat = mdstat_read(1, 0);
57 struct mdstat_ent *ent;
58 int busy = 0;
59
60 for (ent = mdstat; ent; ent = ent->next) {
61 if (ent->metadata_version == NULL)
62 continue;
63 if (strncmp(ent->metadata_version, "external:", 9) != 0)
64 continue;
65 if (!is_subarray(&ent->metadata_version[9]))
66 continue;
67 /* Skip first char - it can be '/' or '-' */
68 if (strcmp(&ent->metadata_version[10], metadata_version+1) == 0) {
69 busy = 1;
70 break;
71 }
72 }
73 free_mdstat(mdstat);
74
75 return busy;
76 }
77
78 int Assemble(struct supertype *st, char *mddev,
79 mddev_ident_t ident,
80 mddev_dev_t devlist, char *backup_file,
81 int readonly, int runstop,
82 char *update, char *homehost,
83 int verbose, int force)
84 {
85 /*
86 * The task of Assemble is to find a collection of
87 * devices that should (according to their superblocks)
88 * form an array, and to give this collection to the MD driver.
89 * In Linux-2.4 and later, this involves submitting a
90 * SET_ARRAY_INFO ioctl with no arg - to prepare
91 * the array - and then submit a number of
92 * ADD_NEW_DISK ioctls to add disks into
93 * the array. Finally RUN_ARRAY might
94 * be submitted to start the array.
95 *
96 * Much of the work of Assemble is in finding and/or
97 * checking the disks to make sure they look right.
98 *
99 * If mddev is not set, then scan must be set and we
100 * read through the config file for dev+uuid mapping
101 * We recurse, setting mddev, for each device that
102 * - isn't running
103 * - has a valid uuid (or any uuid if !uuidset)
104 *
105 * If mddev is set, we try to determine state of md.
106 * check version - must be at least 0.90.0
107 * check kernel version. must be at least 2.4.
108 * If not, we can possibly fall back on START_ARRAY
109 * Try to GET_ARRAY_INFO.
110 * If possible, give up
111 * If not, try to STOP_ARRAY just to make sure
112 *
113 * If !uuidset and scan, look in conf-file for uuid
114 * If not found, give up
115 * If !devlist and scan and uuidset, get list of devs from conf-file
116 *
117 * For each device:
118 * Check superblock - discard if bad
119 * Check uuid (set if we don't have one) - discard if no match
120 * Check superblock similarity if we have a superblock - discard if different
121 * Record events, devicenum
122 * This should give us a list of devices for the array
123 * We should collect the most recent event number
124 *
125 * Count disks with recent enough event count
126 * While force && !enough disks
127 * Choose newest rejected disks, update event count
128 * mark clean and rewrite superblock
129 * If recent kernel:
130 * SET_ARRAY_INFO
131 * foreach device with recent events : ADD_NEW_DISK
132 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
133 * If old kernel:
134 * Check the device numbers in superblock are right
135 * update superblock if any changes
136 * START_ARRAY
137 *
138 */
139 int mdfd;
140 int clean;
141 int auto_assem = (mddev == NULL);
142 int old_linux = 0;
143 int vers = vers; /* Keep gcc quite - it really is initialised */
144 struct {
145 char *devname;
146 int uptodate; /* set once we decide that this device is as
147 * recent as everything else in the array.
148 */
149 struct mdinfo i;
150 } *devices;
151 int *best = NULL; /* indexed by raid_disk */
152 unsigned int bestcnt = 0;
153 int devcnt = 0;
154 unsigned int okcnt, sparecnt;
155 unsigned int req_cnt;
156 unsigned int i;
157 int most_recent = 0;
158 int chosen_drive;
159 int change = 0;
160 int inargv = 0;
161 int bitmap_done;
162 int start_partial_ok = (runstop >= 0) &&
163 (force || devlist==NULL || auto_assem);
164 unsigned int num_devs;
165 mddev_dev_t tmpdev;
166 struct mdinfo info;
167 struct mdinfo *content = NULL;
168 mdu_array_info_t tmp_inf;
169 char *avail;
170 int nextspare = 0;
171 char *name = NULL;
172 int trustworthy;
173 char chosen_name[1024];
174
175 if (get_linux_version() < 2004000)
176 old_linux = 1;
177
178 /*
179 * If any subdevs are listed, then any that don't
180 * match ident are discarded. Remainder must all match and
181 * become the array.
182 * If no subdevs, then we scan all devices in the config file, but
183 * there must be something in the identity
184 */
185
186 if (!devlist &&
187 ident->uuid_set == 0 &&
188 ident->super_minor < 0 &&
189 ident->devices == NULL) {
190 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
191 mddev ? mddev : "further assembly");
192 return 1;
193 }
194
195 if (devlist == NULL)
196 devlist = conf_get_devs();
197 else if (mddev)
198 inargv = 1;
199
200 try_again:
201 /* We come back here when doing auto-assembly and attempting some
202 * set of devices failed. Those are now marked as ->used==2 and
203 * we ignore them and try again
204 */
205
206 tmpdev = devlist; num_devs = 0;
207 while (tmpdev) {
208 if (tmpdev->used)
209 tmpdev->used = 2;
210 else
211 num_devs++;
212 tmpdev = tmpdev->next;
213 }
214 devices = malloc(num_devs * sizeof(*devices));
215
216 if (!st && ident->st) st = ident->st;
217
218 if (verbose>0)
219 fprintf(stderr, Name ": looking for devices for %s\n",
220 mddev ? mddev : "further assembly");
221
222 /* first walk the list of devices to find a consistent set
223 * that match the criterea, if that is possible.
224 * We flag the ones we like with 'used'.
225 */
226 for (tmpdev = devlist;
227 tmpdev;
228 tmpdev = tmpdev->next) {
229 char *devname = tmpdev->devname;
230 int dfd;
231 struct stat stb;
232 struct supertype *tst = dup_super(st);
233
234 if (tmpdev->used > 1) continue;
235
236 if (ident->devices &&
237 !match_oneof(ident->devices, devname)) {
238 if ((inargv && verbose>=0) || verbose > 0)
239 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
240 continue;
241 }
242
243 dfd = dev_open(devname, O_RDONLY|O_EXCL);
244 if (dfd < 0) {
245 if ((inargv && verbose >= 0) || verbose > 0)
246 fprintf(stderr, Name ": cannot open device %s: %s\n",
247 devname, strerror(errno));
248 tmpdev->used = 2;
249 } else if (fstat(dfd, &stb)< 0) {
250 /* Impossible! */
251 fprintf(stderr, Name ": fstat failed for %s: %s\n",
252 devname, strerror(errno));
253 tmpdev->used = 2;
254 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
255 fprintf(stderr, Name ": %s is not a block device.\n",
256 devname);
257 tmpdev->used = 2;
258 } else if (!tst && (tst = guess_super(dfd)) == NULL) {
259 if ((inargv && verbose >= 0) || verbose > 0)
260 fprintf(stderr, Name ": no recogniseable superblock on %s\n",
261 devname);
262 tmpdev->used = 2;
263 } else if (tst->ss->load_super(tst,dfd, NULL)) {
264 if ((inargv && verbose >= 0) || verbose > 0)
265 fprintf( stderr, Name ": no RAID superblock on %s\n",
266 devname);
267 } else {
268 content = &info;
269 memset(content, 0, sizeof(*content));
270 tst->ss->getinfo_super(tst, content);
271 }
272 if (dfd >= 0) close(dfd);
273
274 if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
275 (!tst || !tst->sb ||
276 same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0)) {
277 if ((inargv && verbose >= 0) || verbose > 0)
278 fprintf(stderr, Name ": %s has wrong uuid.\n",
279 devname);
280 goto loop;
281 }
282 if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
283 (!tst || !tst->sb ||
284 name_matches(content->name, ident->name, homehost)==0)) {
285 if ((inargv && verbose >= 0) || verbose > 0)
286 fprintf(stderr, Name ": %s has wrong name.\n",
287 devname);
288 goto loop;
289 }
290 if (ident->super_minor != UnSet &&
291 (!tst || !tst->sb ||
292 ident->super_minor != content->array.md_minor)) {
293 if ((inargv && verbose >= 0) || verbose > 0)
294 fprintf(stderr, Name ": %s has wrong super-minor.\n",
295 devname);
296 goto loop;
297 }
298 if (ident->level != UnSet &&
299 (!tst || !tst->sb ||
300 ident->level != content->array.level)) {
301 if ((inargv && verbose >= 0) || verbose > 0)
302 fprintf(stderr, Name ": %s has wrong raid level.\n",
303 devname);
304 goto loop;
305 }
306 if (ident->raid_disks != UnSet &&
307 (!tst || !tst->sb ||
308 ident->raid_disks!= content->array.raid_disks)) {
309 if ((inargv && verbose >= 0) || verbose > 0)
310 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
311 devname);
312 goto loop;
313 }
314 if (auto_assem) {
315 if (tst == NULL || tst->sb == NULL)
316 continue;
317 }
318 /* If we are this far, then we are nearly commited to this device.
319 * If the super_block doesn't exist, or doesn't match others,
320 * then we probably cannot continue
321 * However if one of the arrays is for the homehost, and
322 * the other isn't that can disambiguate.
323 */
324
325 if (!tst || !tst->sb) {
326 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
327 devname);
328 if (st)
329 st->ss->free_super(st);
330 return 1;
331 }
332
333 if (st == NULL)
334 st = dup_super(tst);
335 if (st->minor_version == -1)
336 st->minor_version = tst->minor_version;
337 if (st->ss != tst->ss ||
338 st->minor_version != tst->minor_version ||
339 st->ss->compare_super(st, tst) != 0) {
340 /* Some mismatch. If exactly one array matches this host,
341 * we can resolve on that one.
342 * Or, if we are auto assembling, we just ignore the second
343 * for now.
344 */
345 if (auto_assem)
346 goto loop;
347 if (homehost) {
348 int first = st->ss->match_home(st, homehost);
349 int last = tst->ss->match_home(tst, homehost);
350 if (first != last &&
351 (first == 1 || last == 1)) {
352 /* We can do something */
353 if (first) {/* just ignore this one */
354 if ((inargv && verbose >= 0) || verbose > 0)
355 fprintf(stderr, Name ": %s misses out due to wrong homehost\n",
356 devname);
357 goto loop;
358 } else { /* reject all those sofar */
359 mddev_dev_t td;
360 if ((inargv && verbose >= 0) || verbose > 0)
361 fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n",
362 devname);
363 for (td=devlist; td != tmpdev; td=td->next)
364 if (td->used == 1)
365 td->used = 0;
366 tmpdev->used = 1;
367 goto loop;
368 }
369 }
370 }
371 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
372 devname);
373 tst->ss->free_super(tst);
374 st->ss->free_super(st);
375 return 1;
376 }
377
378 tmpdev->used = 1;
379
380 loop:
381 if (tst)
382 tst->ss->free_super(tst);
383 }
384
385 if (!st || !st->sb || !content)
386 return 2;
387
388 /* Now need to open array the device. Use create_mddev */
389 if (content == &info)
390 st->ss->getinfo_super(st, content);
391
392 trustworthy = FOREIGN;
393 switch (st->ss->match_home(st, homehost)) {
394 case 0:
395 trustworthy = FOREIGN;
396 name = content->name;
397 break;
398 case 1:
399 trustworthy = LOCAL;
400 name = strchr(content->name, ':');
401 if (name)
402 name++;
403 else
404 name = content->name;
405 break;
406 case -1:
407 trustworthy = FOREIGN;
408 break;
409 }
410 if (!auto_assem && trustworthy == FOREIGN)
411 /* If the array is listed in mdadm or on
412 * command line, then we trust the name
413 * even if the array doesn't look local
414 */
415 trustworthy = LOCAL;
416
417 if (content->name[0] == 0 &&
418 content->array.level == LEVEL_CONTAINER) {
419 name = content->text_version;
420 trustworthy = METADATA;
421 }
422 mdfd = create_mddev(mddev, name, ident->autof, trustworthy,
423 chosen_name);
424 if (mdfd < 0) {
425 st->ss->free_super(st);
426 free(devices);
427 if (auto_assem)
428 goto try_again;
429 return 1;
430 }
431 mddev = chosen_name;
432 vers = md_get_version(mdfd);
433 if (vers < 9000) {
434 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
435 " Upgrade your kernel or try --build\n");
436 close(mdfd);
437 return 1;
438 }
439 if (ioctl(mdfd, GET_ARRAY_INFO, &tmp_inf)==0) {
440 fprintf(stderr, Name ": %s already active, cannot restart it!\n",
441 mddev);
442 for (tmpdev = devlist ;
443 tmpdev && tmpdev->used != 1;
444 tmpdev = tmpdev->next)
445 ;
446 if (tmpdev && auto_assem)
447 fprintf(stderr, Name ": %s needed for %s...\n",
448 mddev, tmpdev->devname);
449 close(mdfd);
450 mdfd = -3;
451 st->ss->free_super(st);
452 free(devices);
453 if (auto_assem)
454 goto try_again;
455 return 1;
456 }
457 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
458
459 /* Ok, no bad inconsistancy, we can try updating etc */
460 bitmap_done = 0;
461 for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
462 char *devname = tmpdev->devname;
463 struct stat stb;
464 /* looks like a good enough match to update the super block if needed */
465 #ifndef MDASSEMBLE
466 if (update) {
467 int dfd;
468 /* prepare useful information in info structures */
469 struct stat stb2;
470 struct supertype *tst;
471 fstat(mdfd, &stb2);
472
473 if (strcmp(update, "uuid")==0 &&
474 !ident->uuid_set) {
475 int rfd;
476 if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
477 read(rfd, ident->uuid, 16) != 16) {
478 *(__u32*)(ident->uuid) = random();
479 *(__u32*)(ident->uuid+1) = random();
480 *(__u32*)(ident->uuid+2) = random();
481 *(__u32*)(ident->uuid+3) = random();
482 }
483 if (rfd >= 0) close(rfd);
484 }
485 dfd = dev_open(devname, O_RDWR|O_EXCL);
486
487 remove_partitions(dfd);
488
489 tst = dup_super(st);
490 tst->ss->load_super(tst, dfd, NULL);
491 tst->ss->getinfo_super(tst, content);
492
493 memcpy(content->uuid, ident->uuid, 16);
494 strcpy(content->name, ident->name);
495 content->array.md_minor = minor(stb2.st_rdev);
496
497 tst->ss->update_super(tst, content, update,
498 devname, verbose,
499 ident->uuid_set, homehost);
500 if (strcmp(update, "uuid")==0 &&
501 !ident->uuid_set) {
502 ident->uuid_set = 1;
503 memcpy(ident->uuid, content->uuid, 16);
504 }
505 if (dfd < 0)
506 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
507 devname);
508 else if (tst->ss->store_super(tst, dfd))
509 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
510 devname);
511 if (dfd >= 0)
512 close(dfd);
513
514 if (strcmp(update, "uuid")==0 &&
515 ident->bitmap_fd >= 0 && !bitmap_done) {
516 if (bitmap_update_uuid(ident->bitmap_fd,
517 content->uuid,
518 tst->ss->swapuuid) != 0)
519 fprintf(stderr, Name ": Could not update uuid on external bitmap.\n");
520 else
521 bitmap_done = 1;
522 }
523 tst->ss->free_super(tst);
524 } else
525 #endif
526 {
527 struct supertype *tst = dup_super(st);
528 int dfd;
529 dfd = dev_open(devname, O_RDWR|O_EXCL);
530
531 remove_partitions(dfd);
532
533 tst->ss->load_super(tst, dfd, NULL);
534 tst->ss->getinfo_super(tst, content);
535 tst->ss->free_super(tst);
536 close(dfd);
537 }
538
539 stat(devname, &stb);
540
541 if (verbose > 0)
542 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
543 devname, mddev, content->disk.raid_disk);
544 devices[devcnt].devname = devname;
545 devices[devcnt].uptodate = 0;
546 devices[devcnt].i = *content;
547 devices[devcnt].i.disk.major = major(stb.st_rdev);
548 devices[devcnt].i.disk.minor = minor(stb.st_rdev);
549 if (most_recent < devcnt) {
550 if (devices[devcnt].i.events
551 > devices[most_recent].i.events)
552 most_recent = devcnt;
553 }
554 if (content->array.level == -4)
555 /* with multipath, the raid_disk from the superblock is meaningless */
556 i = devcnt;
557 else
558 i = devices[devcnt].i.disk.raid_disk;
559 if (i+1 == 0) {
560 if (nextspare < content->array.raid_disks)
561 nextspare = content->array.raid_disks;
562 i = nextspare++;
563 } else {
564 if (i >= content->array.raid_disks &&
565 i >= nextspare)
566 nextspare = i+1;
567 }
568 if (i < 10000) {
569 if (i >= bestcnt) {
570 unsigned int newbestcnt = i+10;
571 int *newbest = malloc(sizeof(int)*newbestcnt);
572 unsigned int c;
573 for (c=0; c < newbestcnt; c++)
574 if (c < bestcnt)
575 newbest[c] = best[c];
576 else
577 newbest[c] = -1;
578 if (best)free(best);
579 best = newbest;
580 bestcnt = newbestcnt;
581 }
582 if (best[i] >=0 &&
583 devices[best[i]].i.events
584 == devices[devcnt].i.events
585 && (devices[best[i]].i.disk.minor
586 != devices[devcnt].i.disk.minor)
587 && st->ss == &super0
588 && content->array.level != LEVEL_MULTIPATH) {
589 /* two different devices with identical superblock.
590 * Could be a mis-detection caused by overlapping
591 * partitions. fail-safe.
592 */
593 fprintf(stderr, Name ": WARNING %s and %s appear"
594 " to have very similar superblocks.\n"
595 " If they are really different, "
596 "please --zero the superblock on one\n"
597 " If they are the same or overlap,"
598 " please remove one from %s.\n",
599 devices[best[i]].devname, devname,
600 inargv ? "the list" :
601 "the\n DEVICE list in mdadm.conf"
602 );
603 close(mdfd);
604 return 1;
605 }
606 if (best[i] == -1
607 || (devices[best[i]].i.events
608 < devices[devcnt].i.events))
609 best[i] = devcnt;
610 }
611 devcnt++;
612 }
613
614 if (devcnt == 0) {
615 fprintf(stderr, Name ": no devices found for %s\n",
616 mddev);
617 if (st)
618 st->ss->free_super(st);
619 close(mdfd);
620 return 1;
621 }
622
623 if (update && strcmp(update, "byteorder")==0)
624 st->minor_version = 90;
625
626 st->ss->getinfo_super(st, content);
627 clean = content->array.state & 1;
628
629 /* now we have some devices that might be suitable.
630 * I wonder how many
631 */
632 avail = malloc(content->array.raid_disks);
633 memset(avail, 0, content->array.raid_disks);
634 okcnt = 0;
635 sparecnt=0;
636 for (i=0; i< bestcnt ;i++) {
637 int j = best[i];
638 int event_margin = 1; /* always allow a difference of '1'
639 * like the kernel does
640 */
641 if (j < 0) continue;
642 /* note: we ignore error flags in multipath arrays
643 * as they don't make sense
644 */
645 if (content->array.level != -4)
646 if (!(devices[j].i.disk.state & (1<<MD_DISK_SYNC))) {
647 if (!(devices[j].i.disk.state
648 & (1<<MD_DISK_FAULTY)))
649 sparecnt++;
650 continue;
651 }
652 if (devices[j].i.events+event_margin >=
653 devices[most_recent].i.events) {
654 devices[j].uptodate = 1;
655 if (i < content->array.raid_disks) {
656 okcnt++;
657 avail[i]=1;
658 } else
659 sparecnt++;
660 }
661 }
662 while (force && !enough(content->array.level, content->array.raid_disks,
663 content->array.layout, 1,
664 avail, okcnt)) {
665 /* Choose the newest best drive which is
666 * not up-to-date, update the superblock
667 * and add it.
668 */
669 int fd;
670 struct supertype *tst;
671 long long current_events;
672 chosen_drive = -1;
673 for (i=0; i<content->array.raid_disks && i < bestcnt; i++) {
674 int j = best[i];
675 if (j>=0 &&
676 !devices[j].uptodate &&
677 devices[j].i.events > 0 &&
678 (chosen_drive < 0 ||
679 devices[j].i.events
680 > devices[chosen_drive].i.events))
681 chosen_drive = j;
682 }
683 if (chosen_drive < 0)
684 break;
685 current_events = devices[chosen_drive].i.events;
686 add_another:
687 if (verbose >= 0)
688 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
689 devices[chosen_drive].devname,
690 devices[chosen_drive].i.disk.raid_disk,
691 (int)(devices[chosen_drive].i.events),
692 (int)(devices[most_recent].i.events));
693 fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
694 if (fd < 0) {
695 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
696 devices[chosen_drive].devname);
697 devices[chosen_drive].i.events = 0;
698 continue;
699 }
700 tst = dup_super(st);
701 if (tst->ss->load_super(tst,fd, NULL)) {
702 close(fd);
703 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
704 devices[chosen_drive].devname);
705 devices[chosen_drive].i.events = 0;
706 continue;
707 }
708 content->events = devices[most_recent].i.events;
709 tst->ss->update_super(tst, content, "force-one",
710 devices[chosen_drive].devname, verbose,
711 0, NULL);
712
713 if (tst->ss->store_super(tst, fd)) {
714 close(fd);
715 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
716 devices[chosen_drive].devname);
717 devices[chosen_drive].i.events = 0;
718 tst->ss->free_super(tst);
719 continue;
720 }
721 close(fd);
722 devices[chosen_drive].i.events = devices[most_recent].i.events;
723 devices[chosen_drive].uptodate = 1;
724 avail[chosen_drive] = 1;
725 okcnt++;
726 tst->ss->free_super(tst);
727
728 /* If there are any other drives of the same vintage,
729 * add them in as well. We can't lose and we might gain
730 */
731 for (i=0; i<content->array.raid_disks && i < bestcnt ; i++) {
732 int j = best[i];
733 if (j >= 0 &&
734 !devices[j].uptodate &&
735 devices[j].i.events > 0 &&
736 devices[j].i.events == current_events) {
737 chosen_drive = j;
738 goto add_another;
739 }
740 }
741 }
742
743 /* Now we want to look at the superblock which the kernel will base things on
744 * and compare the devices that we think are working with the devices that the
745 * superblock thinks are working.
746 * If there are differences and --force is given, then update this chosen
747 * superblock.
748 */
749 chosen_drive = -1;
750 st->ss->free_super(st);
751 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
752 int j = best[i];
753 int fd;
754
755 if (j<0)
756 continue;
757 if (!devices[j].uptodate)
758 continue;
759 chosen_drive = j;
760 if ((fd=dev_open(devices[j].devname, O_RDONLY|O_EXCL))< 0) {
761 fprintf(stderr, Name ": Cannot open %s: %s\n",
762 devices[j].devname, strerror(errno));
763 close(mdfd);
764 return 1;
765 }
766 if (st->ss->load_super(st,fd, NULL)) {
767 close(fd);
768 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
769 devices[j].devname);
770 close(mdfd);
771 return 1;
772 }
773 close(fd);
774 }
775 if (st->sb == NULL) {
776 fprintf(stderr, Name ": No suitable drives found for %s\n", mddev);
777 close(mdfd);
778 return 1;
779 }
780 st->ss->getinfo_super(st, content);
781 #ifndef MDASSEMBLE
782 sysfs_init(content, mdfd, 0);
783 #endif
784 for (i=0; i<bestcnt; i++) {
785 int j = best[i];
786 unsigned int desired_state;
787
788 if (i < content->array.raid_disks)
789 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
790 else
791 desired_state = 0;
792
793 if (j<0)
794 continue;
795 if (!devices[j].uptodate)
796 continue;
797
798 devices[j].i.disk.state = desired_state;
799
800 if (st->ss->update_super(st, &devices[j].i, "assemble", NULL,
801 verbose, 0, NULL)) {
802 if (force) {
803 if (verbose >= 0)
804 fprintf(stderr, Name ": "
805 "clearing FAULTY flag for device %d in %s for %s\n",
806 j, mddev, devices[j].devname);
807 change = 1;
808 } else {
809 if (verbose >= -1)
810 fprintf(stderr, Name ": "
811 "device %d in %s has wrong state in superblock, but %s seems ok\n",
812 i, mddev, devices[j].devname);
813 }
814 }
815 #if 0
816 if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) {
817 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
818 i, mddev);
819 }
820 #endif
821 }
822 if (force && !clean &&
823 !enough(content->array.level, content->array.raid_disks,
824 content->array.layout, clean,
825 avail, okcnt)) {
826 change += st->ss->update_super(st, content, "force-array",
827 devices[chosen_drive].devname, verbose,
828 0, NULL);
829 clean = 1;
830 }
831
832 if (change) {
833 int fd;
834 fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
835 if (fd < 0) {
836 fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
837 devices[chosen_drive].devname);
838 close(mdfd);
839 return 1;
840 }
841 if (st->ss->store_super(st, fd)) {
842 close(fd);
843 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
844 devices[chosen_drive].devname);
845 close(mdfd);
846 return 1;
847 }
848 close(fd);
849 }
850
851 /* If we are in the middle of a reshape we may need to restore saved data
852 * that was moved aside due to the reshape overwriting live data
853 * The code of doing this lives in Grow.c
854 */
855 #ifndef MDASSEMBLE
856 if (content->reshape_active) {
857 int err = 0;
858 int *fdlist = malloc(sizeof(int)* bestcnt);
859 for (i=0; i<bestcnt; i++) {
860 int j = best[i];
861 if (j >= 0) {
862 fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
863 if (fdlist[i] < 0) {
864 fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
865 devices[j].devname);
866 err = 1;
867 break;
868 }
869 } else
870 fdlist[i] = -1;
871 }
872 if (!err)
873 err = Grow_restart(st, content, fdlist, bestcnt, backup_file);
874 while (i>0) {
875 i--;
876 if (fdlist[i]>=0) close(fdlist[i]);
877 }
878 if (err) {
879 fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
880 close(mdfd);
881 return err;
882 }
883 }
884 #endif
885 /* count number of in-sync devices according to the superblock.
886 * We must have this number to start the array without -s or -R
887 */
888 req_cnt = content->array.working_disks;
889
890 /* Almost ready to actually *do* something */
891 if (!old_linux) {
892 int rv;
893
894 /* First, fill in the map, so that udev can find our name
895 * as soon as we become active.
896 */
897 map_update(NULL, fd2devnum(mdfd), content->text_version,
898 content->uuid, chosen_name);
899
900 rv = set_array_info(mdfd, st, content);
901 if (rv) {
902 fprintf(stderr, Name ": failed to set array info for %s: %s\n",
903 mddev, strerror(errno));
904 close(mdfd);
905 return 1;
906 }
907 if (ident->bitmap_fd >= 0) {
908 if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
909 fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
910 close(mdfd);
911 return 1;
912 }
913 } else if (ident->bitmap_file) {
914 /* From config file */
915 int bmfd = open(ident->bitmap_file, O_RDWR);
916 if (bmfd < 0) {
917 fprintf(stderr, Name ": Could not open bitmap file %s\n",
918 ident->bitmap_file);
919 close(mdfd);
920 return 1;
921 }
922 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
923 fprintf(stderr, Name ": Failed to set bitmapfile for %s\n", mddev);
924 close(bmfd);
925 close(mdfd);
926 return 1;
927 }
928 close(bmfd);
929 }
930
931 /* First, add the raid disks, but add the chosen one last */
932 for (i=0; i<= bestcnt; i++) {
933 int j;
934 if (i < bestcnt) {
935 j = best[i];
936 if (j == chosen_drive)
937 continue;
938 } else
939 j = chosen_drive;
940
941 if (j >= 0 /* && devices[j].uptodate */) {
942 rv = add_disk(mdfd, st, content, &devices[j].i);
943
944 if (rv) {
945 fprintf(stderr, Name ": failed to add "
946 "%s to %s: %s\n",
947 devices[j].devname,
948 mddev,
949 strerror(errno));
950 if (i < content->array.raid_disks
951 || i == bestcnt)
952 okcnt--;
953 else
954 sparecnt--;
955 } else if (verbose > 0)
956 fprintf(stderr, Name ": added %s "
957 "to %s as %d\n",
958 devices[j].devname, mddev,
959 devices[j].i.disk.raid_disk);
960 } else if (verbose > 0 && i < content->array.raid_disks)
961 fprintf(stderr, Name ": no uptodate device for "
962 "slot %d of %s\n",
963 i, mddev);
964 }
965
966 if (content->array.level == LEVEL_CONTAINER) {
967 if (verbose >= 0) {
968 fprintf(stderr, Name ": Container %s has been "
969 "assembled with %d drive%s",
970 mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s");
971 if (okcnt < content->array.raid_disks)
972 fprintf(stderr, " (out of %d)",
973 content->array.raid_disks);
974 fprintf(stderr, "\n");
975 }
976 sysfs_uevent(content, "change");
977 close(mdfd);
978 return 0;
979 }
980
981 if (runstop == 1 ||
982 (runstop <= 0 &&
983 ( enough(content->array.level, content->array.raid_disks,
984 content->array.layout, clean, avail, okcnt) &&
985 (okcnt >= req_cnt || start_partial_ok)
986 ))) {
987 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
988 if (verbose >= 0) {
989 fprintf(stderr, Name ": %s has been started with %d drive%s",
990 mddev, okcnt, okcnt==1?"":"s");
991 if (okcnt < content->array.raid_disks)
992 fprintf(stderr, " (out of %d)", content->array.raid_disks);
993 if (sparecnt)
994 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
995 fprintf(stderr, ".\n");
996 }
997 close(mdfd);
998 if (auto_assem) {
999 int usecs = 1;
1000 /* There is a nasty race with 'mdadm --monitor'.
1001 * If it opens this device before we close it,
1002 * it gets an incomplete open on which IO
1003 * doesn't work and the capacity is
1004 * wrong.
1005 * If we reopen (to check for layered devices)
1006 * before --monitor closes, we loose.
1007 *
1008 * So: wait upto 1 second for there to be
1009 * a non-zero capacity.
1010 */
1011 while (usecs < 1000) {
1012 mdfd = open(mddev, O_RDONLY);
1013 if (mdfd >= 0) {
1014 unsigned long long size;
1015 if (get_dev_size(mdfd, NULL, &size) &&
1016 size > 0)
1017 break;
1018 close(mdfd);
1019 }
1020 usleep(usecs);
1021 usecs <<= 1;
1022 }
1023 }
1024 return 0;
1025 }
1026 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
1027 mddev, strerror(errno));
1028
1029 if (!enough(content->array.level, content->array.raid_disks,
1030 content->array.layout, 1, avail, okcnt))
1031 fprintf(stderr, Name ": Not enough devices to "
1032 "start the array.\n");
1033 else if (!enough(content->array.level,
1034 content->array.raid_disks,
1035 content->array.layout, clean,
1036 avail, okcnt))
1037 fprintf(stderr, Name ": Not enough devices to "
1038 "start the array while not clean "
1039 "- consider --force.\n");
1040
1041 if (auto_assem)
1042 ioctl(mdfd, STOP_ARRAY, NULL);
1043 close(mdfd);
1044 return 1;
1045 }
1046 if (runstop == -1) {
1047 fprintf(stderr, Name ": %s assembled from %d drive%s",
1048 mddev, okcnt, okcnt==1?"":"s");
1049 if (okcnt != content->array.raid_disks)
1050 fprintf(stderr, " (out of %d)", content->array.raid_disks);
1051 fprintf(stderr, ", but not started.\n");
1052 close(mdfd);
1053 return 0;
1054 }
1055 if (verbose >= -1) {
1056 fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
1057 if (sparecnt)
1058 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
1059 if (!enough(content->array.level, content->array.raid_disks,
1060 content->array.layout, 1, avail, okcnt))
1061 fprintf(stderr, " - not enough to start the array.\n");
1062 else if (!enough(content->array.level,
1063 content->array.raid_disks,
1064 content->array.layout, clean,
1065 avail, okcnt))
1066 fprintf(stderr, " - not enough to start the "
1067 "array while not clean - consider "
1068 "--force.\n");
1069 else {
1070 if (req_cnt == content->array.raid_disks)
1071 fprintf(stderr, " - need all %d to start it", req_cnt);
1072 else
1073 fprintf(stderr, " - need %d of %d to start", req_cnt, content->array.raid_disks);
1074 fprintf(stderr, " (use --run to insist).\n");
1075 }
1076 }
1077 if (auto_assem)
1078 ioctl(mdfd, STOP_ARRAY, NULL);
1079 return 1;
1080 } else {
1081 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
1082 * been updated to point to the current locations of devices.
1083 * so we can just start the array
1084 */
1085 unsigned long dev;
1086 dev = makedev(devices[chosen_drive].i.disk.major,
1087 devices[chosen_drive].i.disk.minor);
1088 if (ioctl(mdfd, START_ARRAY, dev)) {
1089 fprintf(stderr, Name ": Cannot start array: %s\n",
1090 strerror(errno));
1091 }
1092
1093 }
1094 close(mdfd);
1095 return 0;
1096 }
1097
1098 #ifndef MDASSEMBLE
1099 int assemble_container_content(struct supertype *st, int mdfd,
1100 struct mdinfo *content, int runstop,
1101 char *chosen_name, int verbose)
1102 {
1103 struct mdinfo *dev, *sra;
1104 int working = 0, preexist = 0;
1105 struct map_ent *map = NULL;
1106
1107 sysfs_init(content, mdfd, 0);
1108
1109 sra = sysfs_read(mdfd, 0, GET_VERSION);
1110 if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0)
1111 if (sysfs_set_array(content, md_get_version(mdfd)) != 0)
1112 return 1;
1113 if (sra)
1114 sysfs_free(sra);
1115
1116 for (dev = content->devs; dev; dev = dev->next)
1117 if (sysfs_add_disk(content, dev) == 0)
1118 working++;
1119 else if (errno == EEXIST)
1120 preexist++;
1121 if (working == 0)
1122 /* Nothing new, don't try to start */ ;
1123 else if (runstop > 0 ||
1124 (working + preexist) >= content->array.working_disks) {
1125 switch(content->array.level) {
1126 case LEVEL_LINEAR:
1127 case LEVEL_MULTIPATH:
1128 case 0:
1129 sysfs_set_str(content, NULL, "array_state",
1130 "active");
1131 break;
1132 default:
1133 sysfs_set_str(content, NULL, "array_state",
1134 "readonly");
1135 /* start mdmon if needed. */
1136 if (!mdmon_running(st->container_dev))
1137 start_mdmon(st->container_dev);
1138 ping_monitor(devnum2devname(st->container_dev));
1139 break;
1140 }
1141 sysfs_set_safemode(content, content->safe_mode_delay);
1142 if (verbose >= 0) {
1143 fprintf(stderr, Name
1144 ": Started %s with %d devices",
1145 chosen_name, working + preexist);
1146 if (preexist)
1147 fprintf(stderr, " (%d new)", working);
1148 fprintf(stderr, "\n");
1149 }
1150 /* FIXME should have an O_EXCL and wait for read-auto */
1151 } else
1152 if (verbose >= 0)
1153 fprintf(stderr, Name
1154 ": %s assembled with %d devices but "
1155 "not started\n",
1156 chosen_name, working);
1157 map_update(&map, fd2devnum(mdfd),
1158 content->text_version,
1159 content->uuid, chosen_name);
1160
1161 return 0;
1162 }
1163 #endif
1164