]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
mdmonitor: use MAILFROM to set sendmail envelope sender address
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2016 Neil Brown <neilb@suse.com>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "xmalloc.h"
27
28 #include <ctype.h>
29
30 mapping_t assemble_statuses[] = {
31 { "but cannot be started", INCR_NO },
32 { "but not safe to start", INCR_UNSAFE },
33 { "and started", INCR_YES },
34 { NULL, INCR_ALREADY }
35 };
36
37
38 /**
39 * struct assembly_array_info - General, meaningful information for assembly.
40 * @name: Array name.
41 * @new_cnt: Count of drives known to be members, recently added.
42 * @preexist_cnt: Count of member drives in pre-assembled array.
43 * @exp_cnt: Count of known expansion targets.
44 *
45 * FIXME: @exp_new_cnt for recently added expansion targets.
46 */
47 struct assembly_array_info {
48 char *name;
49 int new_cnt;
50 int preexist_cnt;
51 int exp_cnt;
52 };
53
54 /**
55 * set_array_assembly_status() - generate status of assembly for an array.
56 * @c: Global settings.
57 * @result: Pointer to status mask.
58 * @status: Status to be set/printed.
59 * @arr: Array information.
60 *
61 * Print status message to user or set it in @result if it is not NULL.
62 */
63 static void set_array_assembly_status(struct context *c,
64 int *result, int status,
65 struct assembly_array_info *arr)
66 {
67 int raid_disks = arr->preexist_cnt + arr->new_cnt;
68 char *status_msg = map_num_s(assemble_statuses, status);
69
70 if (c->export && result)
71 *result |= status;
72
73 if (c->export || c->verbose < 0)
74 return;
75
76 pr_err("%s has been assembled with %d device%s", arr->name,
77 raid_disks, raid_disks == 1 ? "":"s");
78 if (arr->preexist_cnt > 0)
79 fprintf(stderr, " (%d new)", arr->new_cnt);
80 if (arr->exp_cnt)
81 fprintf(stderr, " ( + %d for expansion)", arr->exp_cnt);
82 fprintf(stderr, " %s.\n", status_msg);
83 }
84
85 static int name_matches(char *found, char *required, char *homehost, int require_homehost)
86 {
87 /* See if the name found matches the required name, possibly
88 * prefixed with 'homehost'
89 */
90 char *sep;
91 unsigned int l;
92
93 if (strcmp(found, required)==0)
94 return 1;
95 sep = strchr(found, ':');
96 if (!sep)
97 return 0;
98 l = sep - found;
99 if (strncmp(found, "any:", 4) == 0 ||
100 (homehost && strcmp(homehost, "any") == 0) ||
101 !require_homehost ||
102 (homehost && strlen(homehost) == l &&
103 strncmp(found, homehost, l) == 0)) {
104 /* matching homehost */
105 if (strcmp(sep+1, required) == 0)
106 return 1;
107 }
108 return 0;
109 }
110
111 static int is_member_busy(char *metadata_version)
112 {
113 /* check if the given member array is active */
114 struct mdstat_ent *mdstat = mdstat_read(0, 0);
115 struct mdstat_ent *ent;
116 int busy = 0;
117
118 for (ent = mdstat; ent; ent = ent->next) {
119 if (!is_mdstat_ent_subarray(ent))
120 continue;
121
122 /* Skip first char - it can be '/' or '-' */
123 if (strcmp(&ent->metadata_version[10], metadata_version + 1) == 0) {
124 busy = 1;
125 break;
126 }
127 }
128 free_mdstat(mdstat);
129
130 return busy;
131 }
132
133 static int ident_matches(struct mddev_ident *ident,
134 struct mdinfo *content,
135 struct supertype *tst,
136 char *homehost, int require_homehost,
137 enum update_opt update, char *devname)
138 {
139
140 if (ident->uuid_set && update != UOPT_UUID &&
141 same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0 &&
142 memcmp(content->uuid, uuid_zero, sizeof(int[4])) != 0) {
143 if (devname)
144 pr_err("%s has wrong uuid.\n", devname);
145 return 0;
146 }
147 if (ident->name[0] && update != UOPT_NAME &&
148 name_matches(content->name, ident->name, homehost, require_homehost)==0) {
149 if (devname)
150 pr_err("%s has wrong name.\n", devname);
151 return 0;
152 }
153 if (ident->super_minor != UnSet &&
154 ident->super_minor != content->array.md_minor) {
155 if (devname)
156 pr_err("%s has wrong super-minor.\n",
157 devname);
158 return 0;
159 }
160 if (ident->level != UnSet &&
161 ident->level != content->array.level) {
162 if (devname)
163 pr_err("%s has wrong raid level.\n",
164 devname);
165 return 0;
166 }
167 if (ident->raid_disks != UnSet &&
168 content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */
169 ident->raid_disks!= content->array.raid_disks) {
170 if (devname)
171 pr_err("%s requires wrong number of drives.\n",
172 devname);
173 return 0;
174 }
175 if (ident->member && ident->member[0]) {
176 /* content->text_version must match */
177 char *s = strchr(content->text_version+1, '/');
178 if (s == NULL) {
179 if (devname)
180 pr_err("%s is not a container and one is required.\n",
181 devname);
182 return 0;
183 } else if (strcmp(ident->member, s+1) != 0) {
184 if (devname)
185 pr_err("skipping wrong member %s is %s\n",
186 content->text_version, devname);
187 return 0;
188 }
189 }
190 return 1;
191 }
192
193 static int select_devices(struct mddev_dev *devlist,
194 struct mddev_ident *ident,
195 struct supertype **stp,
196 struct mdinfo **contentp,
197 struct context *c,
198 int inargv, int auto_assem)
199 {
200 struct mddev_dev *tmpdev;
201 int num_devs;
202 struct supertype *st = *stp;
203 struct mdinfo *content = NULL;
204 int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0);
205 struct domainlist *domains = NULL;
206 dev_t rdev;
207
208 tmpdev = devlist; num_devs = 0;
209 while (tmpdev) {
210 if (tmpdev->used)
211 tmpdev->used = 2;
212 else
213 num_devs++;
214 tmpdev->disposition = 0;
215 tmpdev = tmpdev->next;
216 }
217
218 /* first walk the list of devices to find a consistent set
219 * that match the criterea, if that is possible.
220 * We flag the ones we like with 'used'.
221 */
222 for (tmpdev = devlist;
223 tmpdev;
224 tmpdev = tmpdev ? tmpdev->next : NULL) {
225 char *devname = tmpdev->devname;
226 int dfd;
227 struct supertype *tst;
228 struct dev_policy *pol = NULL;
229 int found_container = 0;
230
231 if (tmpdev->used > 1)
232 continue;
233
234 if (ident->container) {
235 if (ident->container[0] == '/' &&
236 !same_dev(ident->container, devname)) {
237 if (report_mismatch)
238 pr_err("%s is not the container required (%s)\n",
239 devname, ident->container);
240 continue;
241 }
242 } else if (ident->devices &&
243 !match_oneof(ident->devices, devname)) {
244 /* Note that we ignore the "device=" identifier if a
245 * "container=" is given. Checking both is unnecessarily
246 * complicated.
247 */
248 if (report_mismatch)
249 pr_err("%s is not one of %s\n", devname, ident->devices);
250 continue;
251 }
252
253 tst = dup_super(st);
254
255 dfd = dev_open(devname, O_RDONLY);
256 if (dfd < 0) {
257 if (report_mismatch)
258 pr_err("cannot open device %s: %s\n",
259 devname, strerror(errno));
260 tmpdev->used = 2;
261 } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
262 tmpdev->used = 2;
263 } else if (must_be_container(dfd)) {
264 if (st) {
265 /* already found some components, this cannot
266 * be another one.
267 */
268 if (report_mismatch)
269 pr_err("%s is a container, but we are looking for components\n",
270 devname);
271 tmpdev->used = 2;
272 } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) {
273 if (report_mismatch)
274 pr_err("not a recognisable container: %s\n",
275 devname);
276 tmpdev->used = 2;
277 } else if (!tst->ss->load_container ||
278 tst->ss->load_container(tst, dfd, NULL)) {
279 if (report_mismatch)
280 pr_err("no correct container type: %s\n",
281 devname);
282 tmpdev->used = 2;
283 } else if (auto_assem &&
284 !conf_test_metadata(tst->ss->name,
285 (pol = devid_policy(rdev)),
286 tst->ss->match_home(tst, c->homehost) == 1)) {
287 if (report_mismatch)
288 pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
289 devname, tst->ss->name);
290 tmpdev->used = 2;
291 } else
292 found_container = 1;
293 } else {
294 if (!tst && (tst = guess_super(dfd)) == NULL) {
295 if (report_mismatch)
296 pr_err("no recogniseable superblock on %s\n",
297 devname);
298 tmpdev->used = 2;
299 } else if ((tst->ignore_hw_compat = 0),
300 tst->ss->load_super(tst, dfd,
301 report_mismatch ? devname : NULL)) {
302 if (report_mismatch)
303 pr_err("no RAID superblock on %s\n",
304 devname);
305 tmpdev->used = 2;
306 } else if (tst->ss->compare_super == NULL) {
307 if (report_mismatch)
308 pr_err("Cannot assemble %s metadata on %s\n",
309 tst->ss->name, devname);
310 tmpdev->used = 2;
311 } else if (auto_assem && st == NULL &&
312 !conf_test_metadata(tst->ss->name,
313 (pol = devid_policy(rdev)),
314 tst->ss->match_home(tst, c->homehost) == 1)) {
315 if (report_mismatch)
316 pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
317 devname, tst->ss->name);
318 tmpdev->used = 2;
319 }
320 }
321 if (dfd >= 0) close(dfd);
322 if (tmpdev->used == 2) {
323 if (auto_assem || !inargv)
324 /* Ignore unrecognised devices during auto-assembly */
325 goto loop;
326 if (ident->name[0] ||
327 ident->super_minor != UnSet)
328 /* Ignore unrecognised device if looking for
329 * specific array */
330 goto loop;
331 if (ident->uuid_set)
332 /* ignore unrecognized device if looking for
333 * specific uuid
334 */
335 goto loop;
336
337 pr_err("%s has no superblock - assembly aborted\n",
338 devname);
339 if (st)
340 st->ss->free_super(st);
341 dev_policy_free(pol);
342 domain_free(domains);
343 if (tst) {
344 tst->ss->free_super(tst);
345 free(tst);
346 }
347 return -1;
348 }
349
350 if (found_container) {
351 /* tmpdev is a container. We need to be either
352 * looking for a member, or auto-assembling
353 */
354 /* should be safe to try an exclusive open now, we
355 * have rejected anything that some other mdadm might
356 * be looking at
357 */
358 dfd = dev_open(devname, O_RDONLY | O_EXCL);
359 if (dfd < 0) {
360 if (report_mismatch)
361 pr_err("%s is busy - skipping\n", devname);
362 goto loop;
363 }
364 close(dfd);
365
366 if (ident->container && ident->container[0] != '/') {
367 /* we have a uuid */
368 int uuid[4];
369
370 content = *contentp;
371 tst->ss->getinfo_super(tst, content, NULL);
372
373 if (!parse_uuid(ident->container, uuid) ||
374 !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) {
375 if (report_mismatch)
376 pr_err("%s has wrong UUID to be required container\n",
377 devname);
378 goto loop;
379 }
380 }
381 /* It is worth looking inside this container.
382 */
383 if (c->verbose > 0)
384 pr_err("looking in container %s\n",
385 devname);
386
387 for (content = tst->ss->container_content(tst, NULL);
388 content;
389 content = content->next) {
390
391 if (!ident_matches(ident, content, tst,
392 c->homehost, c->require_homehost,
393 c->update,
394 report_mismatch ? devname : NULL))
395 /* message already printed */;
396 else if (is_member_busy(content->text_version)) {
397 if (report_mismatch)
398 pr_err("member %s in %s is already assembled\n",
399 content->text_version,
400 devname);
401 } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
402 /* do not assemble arrays with unsupported configurations */
403 pr_err("Cannot activate member %s in %s.\n",
404 content->text_version,
405 devname);
406 } else
407 break;
408 }
409 if (!content) {
410 tmpdev->used = 2;
411 goto loop; /* empty container */
412 }
413
414 st = tst; tst = NULL;
415 if (!auto_assem && inargv && tmpdev->next != NULL) {
416 pr_err("%s is a container, but is not only device given: confused and aborting\n",
417 devname);
418 st->ss->free_super(st);
419 dev_policy_free(pol);
420 domain_free(domains);
421 free(st);
422 return -1;
423 }
424 if (c->verbose > 0)
425 pr_err("found match on member %s in %s\n",
426 content->text_version, devname);
427
428 /* make sure we finished the loop */
429 tmpdev = NULL;
430 goto loop;
431 } else {
432 content = *contentp;
433 tst->ss->getinfo_super(tst, content, NULL);
434
435 if (!ident_matches(ident, content, tst,
436 c->homehost, c->require_homehost,
437 c->update,
438 report_mismatch ? devname : NULL))
439 goto loop;
440
441 if (auto_assem) {
442 /* Never auto-assemble things that conflict
443 * with mdadm.conf in some way
444 */
445 struct mddev_ident *match;
446 int rv = 0;
447
448 match = conf_match(tst, content, devname,
449 report_mismatch ? c->verbose : -1,
450 &rv);
451 if (!match && rv == 2)
452 goto loop;
453 if (match && match->devname &&
454 strcasecmp(match->devname, "<ignore>") == 0) {
455 if (report_mismatch)
456 pr_err("%s is a member of an explicitly ignored array\n",
457 devname);
458 goto loop;
459 }
460 if (match && !ident_matches(match, content, tst,
461 c->homehost, c->require_homehost,
462 c->update,
463 report_mismatch ? devname : NULL))
464 /* Array exists in mdadm.conf but some
465 * details don't match, so reject it
466 */
467 goto loop;
468 }
469
470 /* should be safe to try an exclusive open now, we
471 * have rejected anything that some other mdadm might
472 * be looking at
473 */
474 dfd = dev_open(devname, O_RDONLY | O_EXCL);
475 if (dfd < 0) {
476 if (report_mismatch)
477 pr_err("%s is busy - skipping\n", devname);
478 goto loop;
479 }
480 close(dfd);
481
482 if (st == NULL)
483 st = dup_super(tst);
484 if (st->minor_version == -1)
485 st->minor_version = tst->minor_version;
486
487 if (memcmp(content->uuid, uuid_zero,
488 sizeof(int[4])) == 0) {
489 /* this is a floating spare. It cannot define
490 * an array unless there are no more arrays of
491 * this type to be found. It can be included
492 * in an array of this type though.
493 */
494 tmpdev->used = 3;
495 goto loop;
496 }
497
498 if (st->ss != tst->ss ||
499 st->minor_version != tst->minor_version ||
500 st->ss->compare_super(st, tst, 1) != 0) {
501 /* Some mismatch. If exactly one array matches this host,
502 * we can resolve on that one.
503 * Or, if we are auto assembling, we just ignore the second
504 * for now.
505 */
506 if (auto_assem)
507 goto loop;
508 if (c->homehost) {
509 int first = st->ss->match_home(st, c->homehost);
510 int last = tst->ss->match_home(tst, c->homehost);
511 if (first != last &&
512 (first == 1 || last == 1)) {
513 /* We can do something */
514 if (first) {/* just ignore this one */
515 if (report_mismatch)
516 pr_err("%s misses out due to wrong homehost\n",
517 devname);
518 goto loop;
519 } else { /* reject all those sofar */
520 struct mddev_dev *td;
521 if (report_mismatch)
522 pr_err("%s overrides previous devices due to good homehost\n",
523 devname);
524 for (td=devlist; td != tmpdev; td=td->next)
525 if (td->used == 1)
526 td->used = 0;
527 tmpdev->used = 1;
528 goto loop;
529 }
530 }
531 }
532 pr_err("superblock on %s doesn't match others - assembly aborted\n",
533 devname);
534 tst->ss->free_super(tst);
535 st->ss->free_super(st);
536 dev_policy_free(pol);
537 domain_free(domains);
538 free(tst);
539 return -1;
540 }
541 tmpdev->used = 1;
542 }
543 loop:
544 /* Collect domain information from members only */
545 if (tmpdev && tmpdev->used == 1) {
546 if (!pol)
547 pol = devid_policy(rdev);
548 domain_merge(&domains, pol, tst?tst->ss->name:NULL);
549 }
550 dev_policy_free(pol);
551 pol = NULL;
552 if (tst) {
553 tst->ss->free_super(tst);
554 free(tst);
555 }
556 }
557
558 /* Check if we found some imsm spares but no members */
559 if ((auto_assem ||
560 (ident->uuid_set &&
561 memcmp(uuid_zero, ident->uuid,sizeof(uuid_zero)) == 0)) &&
562 (!st || !st->sb))
563 for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
564 if (tmpdev->used != 3)
565 continue;
566 tmpdev->used = 1;
567 content = *contentp;
568
569 if (!st)
570 return -1;
571
572 if (!st->sb) {
573 /* we need sb from one of the spares */
574 int dfd = dev_open(tmpdev->devname, O_RDONLY);
575 if (dfd < 0 ||
576 st->ss->load_super(st, dfd, NULL))
577 tmpdev->used = 2;
578 close_fd(&dfd);
579 }
580 }
581
582 /* Now reject spares that don't match domains of identified members */
583 for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
584 if (tmpdev->used != 3)
585 continue;
586 if (!stat_is_blkdev(tmpdev->devname, &rdev)) {
587 tmpdev->used = 2;
588 } else {
589 struct dev_policy *pol = devid_policy(rdev);
590 int dt = domain_test(domains, pol, NULL);
591 if (inargv && dt != 0)
592 /* take this spare as domains match
593 * if there are any */
594 tmpdev->used = 1;
595 else if (!inargv && dt == 1)
596 /* device wasn't explicitly listed, so need
597 * explicit domain match - which we have */
598 tmpdev->used = 1;
599 else
600 /* if domains don't match mark as unused */
601 tmpdev->used = 0;
602 dev_policy_free(pol);
603 }
604 }
605 domain_free(domains);
606 *stp = st;
607 if (st && st->sb && content == *contentp)
608 st->ss->getinfo_super(st, content, NULL);
609 *contentp = content;
610
611 return num_devs;
612 }
613
614 struct devs {
615 char *devname;
616 int uptodate; /* set once we decide that this device is as
617 * recent as everything else in the array.
618 */
619 int included; /* set if the device is already in the array
620 * due to a previous '-I'
621 */
622 struct mdinfo i;
623 };
624
625 static int load_devices(struct devs *devices, char *devmap,
626 struct mddev_ident *ident, struct supertype **stp,
627 struct mddev_dev *devlist, struct context *c,
628 struct mdinfo *content,
629 int mdfd, char *mddev,
630 int *most_recentp, int *bestcntp, int **bestp,
631 int inargv)
632 {
633 struct mddev_dev *tmpdev;
634 int devcnt = 0;
635 int nextspare = 0;
636 int most_recent = -1;
637 int bestcnt = 0;
638 int *best = *bestp;
639 struct supertype *st = *stp;
640
641 for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) {
642 char *devname = tmpdev->devname;
643 struct stat stb;
644 struct supertype *tst;
645 int i;
646 int dfd;
647 int disk_state;
648
649 if (tmpdev->used != 1)
650 continue;
651 /* looks like a good enough match to update the super block if needed */
652 if (c->update) {
653 /* prepare useful information in info structures */
654 struct stat stb2;
655 int err;
656
657 if (fstat(mdfd, &stb2) != 0)
658 goto error;
659
660 if (c->update == UOPT_UUID && !ident->uuid_set)
661 random_uuid((__u8 *)ident->uuid);
662
663 if (c->update == UOPT_PPL && ident->btype != BitmapNone) {
664 pr_err("PPL is not compatible with bitmap\n");
665 close(mdfd);
666 free(devices);
667 free(devmap);
668 return -1;
669 }
670
671 dfd = dev_open(devname,
672 tmpdev->disposition == 'I'
673 ? O_RDWR : (O_RDWR|O_EXCL));
674
675 tst = dup_super(st);
676 if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
677 pr_err("cannot re-read metadata from %s - aborting\n",
678 devname);
679 if (dfd >= 0)
680 close(dfd);
681 tst->ss->free_super(tst);
682 free(tst);
683 *stp = st;
684 goto error;
685 }
686 tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
687
688 memcpy(content->uuid, ident->uuid, 16);
689 strcpy(content->name, ident->name);
690 content->array.md_minor = minor(stb2.st_rdev);
691
692 if (c->update == UOPT_BYTEORDER)
693 err = 0;
694 else if (c->update == UOPT_HOME_CLUSTER) {
695 tst->cluster_name = c->homecluster;
696 err = tst->ss->write_bitmap(tst, dfd, NameUpdate);
697 } else if (c->update == UOPT_NODES) {
698 tst->nodes = c->nodes;
699 err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate);
700 } else if (c->update == UOPT_REVERT_RESHAPE && c->invalid_backup)
701 err = tst->ss->update_super(tst, content,
702 UOPT_SPEC_REVERT_RESHAPE_NOBACKUP,
703 devname, c->verbose,
704 ident->uuid_set,
705 c->homehost);
706 else
707 err = tst->ss->update_super(tst, content,
708 c->update,
709 devname, c->verbose,
710 ident->uuid_set,
711 c->homehost);
712 if (err < 0) {
713 if (err == -1)
714 pr_err("--update=%s not understood for %s metadata\n",
715 map_num(update_options, c->update), tst->ss->name);
716 tst->ss->free_super(tst);
717 free(tst);
718 close(dfd);
719 *stp = st;
720 goto error;
721 }
722 if (c->update == UOPT_UUID &&
723 !ident->uuid_set) {
724 ident->uuid_set = 1;
725 memcpy(ident->uuid, content->uuid, 16);
726 }
727 if (tst->ss->store_super(tst, dfd))
728 pr_err("Could not re-write superblock on %s.\n",
729 devname);
730 } else {
731 dfd = dev_open(devname,
732 tmpdev->disposition == 'I'
733 ? O_RDWR : (O_RDWR|O_EXCL));
734 tst = dup_super(st);
735
736 if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
737 pr_err("cannot re-read metadata from %s - aborting\n",
738 devname);
739 if (dfd >= 0)
740 close(dfd);
741 tst->ss->free_super(tst);
742 free(tst);
743 *stp = st;
744 goto error;
745 }
746 tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
747 }
748
749 if (fstat(dfd, &stb) != 0) {
750 close(dfd);
751 free(devices);
752 free(devmap);
753 tst->ss->free_super(tst);
754 free(tst);
755 *stp = st;
756 free(best);
757 return -1;
758 }
759 close(dfd);
760
761 if (c->verbose > 0)
762 pr_err("%s is identified as a member of %s, slot %d%s.\n",
763 devname, mddev, content->disk.raid_disk,
764 (content->disk.state & (1<<MD_DISK_REPLACEMENT)) ? " replacement":"");
765 devices[devcnt].devname = devname;
766 devices[devcnt].uptodate = 0;
767 devices[devcnt].included = (tmpdev->disposition == 'I');
768 devices[devcnt].i = *content;
769 devices[devcnt].i.disk.major = major(stb.st_rdev);
770 devices[devcnt].i.disk.minor = minor(stb.st_rdev);
771
772 disk_state = devices[devcnt].i.disk.state & ~((1<<MD_DISK_FAILFAST) |
773 (1<<MD_DISK_WRITEMOSTLY));
774 if (disk_state == ((1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC))) {
775 if (most_recent < 0 ||
776 devices[devcnt].i.events
777 > devices[most_recent].i.events) {
778 struct supertype *tmp = tst;
779 tst = st;
780 st = tmp;
781 most_recent = devcnt;
782 }
783 }
784 tst->ss->free_super(tst);
785 free(tst);
786
787 if (content->array.level == LEVEL_MULTIPATH)
788 /* with multipath, the raid_disk from the superblock is meaningless */
789 i = devcnt;
790 else
791 i = devices[devcnt].i.disk.raid_disk;
792 if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) {
793 if (nextspare < content->array.raid_disks*2)
794 nextspare = content->array.raid_disks*2;
795 i = nextspare++;
796 } else {
797 /* i is raid_disk - double it so there is room for
798 * replacements */
799 i *= 2;
800 if (devices[devcnt].i.disk.state & (1<<MD_DISK_REPLACEMENT))
801 i++;
802 if (i >= content->array.raid_disks*2 &&
803 i >= nextspare)
804 nextspare = i+1;
805 }
806 if (i < 10000) {
807 if (i >= bestcnt) {
808 int newbestcnt = i+10;
809 int *newbest = xmalloc(sizeof(int)*newbestcnt);
810 int cc;
811 for (cc = 0; cc < newbestcnt; cc++)
812 if (cc < bestcnt)
813 newbest[cc] = best[cc];
814 else
815 newbest[cc] = -1;
816 if (best)free(best);
817 best = newbest;
818 bestcnt = newbestcnt;
819 }
820 if (best[i] >=0 &&
821 devices[best[i]].i.events ==
822 devices[devcnt].i.events &&
823 (devices[best[i]].i.disk.minor !=
824 devices[devcnt].i.disk.minor) &&
825 st->ss == &super0 &&
826 content->array.level != LEVEL_MULTIPATH) {
827 /* two different devices with identical superblock.
828 * Could be a mis-detection caused by overlapping
829 * partitions. fail-safe.
830 */
831 pr_err("WARNING %s and %s appear to have very similar superblocks.\n"
832 " If they are really different, please --zero the superblock on one\n"
833 " If they are the same or overlap, please remove one from %s.\n",
834 devices[best[i]].devname, devname,
835 inargv ? "the list" :
836 "the\n DEVICE list in mdadm.conf"
837 );
838 *stp = st;
839 goto error;
840 }
841 if (best[i] == -1 || (devices[best[i]].i.events
842 < devices[devcnt].i.events))
843 best[i] = devcnt;
844 else if (st->ss == &super_imsm)
845 best[i+1] = devcnt;
846 }
847 devcnt++;
848 }
849 if (most_recent >= 0)
850 *most_recentp = most_recent;
851 *bestcntp = bestcnt;
852 *bestp = best;
853 *stp = st;
854 return devcnt;
855
856 error:
857 close(mdfd);
858 free(devices);
859 free(devmap);
860 free(best);
861 return -1;
862
863 }
864
865 static int force_array(struct mdinfo *content,
866 struct devs *devices,
867 int *best, int bestcnt, char *avail,
868 int most_recent,
869 struct supertype *st,
870 struct context *c)
871 {
872 int okcnt = 0;
873 while (!enough(content->array.level, content->array.raid_disks,
874 content->array.layout, 1,
875 avail) ||
876 (content->reshape_active && content->delta_disks > 0 &&
877 !enough(content->array.level, (content->array.raid_disks
878 - content->delta_disks),
879 content->new_layout, 1, avail))) {
880 /* Choose the newest best drive which is
881 * not up-to-date, update the superblock
882 * and add it.
883 */
884 int fd;
885 struct supertype *tst;
886 unsigned long long current_events;
887 int chosen_drive = -1;
888 int i;
889
890 for (i = 0;
891 i < content->array.raid_disks * 2 && i < bestcnt;
892 i += 2) {
893 int j = best[i];
894 if (j < 0)
895 continue;
896 if (devices[j].uptodate)
897 continue;
898 if (devices[j].i.recovery_start != MaxSector) {
899 int delta;
900 if (!devices[j].i.reshape_active ||
901 devices[j].i.delta_disks <= 0)
902 continue;
903 /* When increasing number of devices, an
904 * added device also appears to be
905 * recovering. It is safe to include it
906 * as long as it won't be a source of
907 * data.
908 * For now, just allow for last data
909 * devices in RAID4 or last devices in RAID4/5/6.
910 */
911 delta = devices[j].i.delta_disks;
912 if (is_level456(devices[j].i.array.level) &&
913 i/2 >= content->array.raid_disks - delta)
914 /* OK */;
915 else if (devices[j].i.array.level == 4 &&
916 i/2 >= content->array.raid_disks - delta - 1)
917 /* OK */;
918 else
919 continue;
920 } else if (devices[j].i.reshape_active !=
921 content->reshape_active ||
922 (devices[j].i.reshape_active &&
923 devices[j].i.reshape_progress !=
924 content->reshape_progress))
925 /* Here, it may be a source of data. If two
926 * devices claim different progresses, it
927 * means that reshape boundaries differ for
928 * their own devices. Kernel will only treat
929 * the first one as reshape progress and
930 * go on. It may cause disaster, so avoid it.
931 */
932 continue;
933 if (chosen_drive < 0 ||
934 devices[j].i.events
935 > devices[chosen_drive].i.events)
936 chosen_drive = j;
937 }
938 if (chosen_drive < 0)
939 break;
940 current_events = devices[chosen_drive].i.events;
941 add_another:
942 if (c->verbose >= 0)
943 pr_err("forcing event count in %s(%d) from %d up to %d\n",
944 devices[chosen_drive].devname,
945 devices[chosen_drive].i.disk.raid_disk,
946 (int)(devices[chosen_drive].i.events),
947 (int)(devices[most_recent].i.events));
948 fd = dev_open(devices[chosen_drive].devname,
949 devices[chosen_drive].included ? O_RDWR
950 : (O_RDWR|O_EXCL));
951 if (fd < 0) {
952 pr_err("Couldn't open %s for write - not updating\n",
953 devices[chosen_drive].devname);
954 devices[chosen_drive].i.events = 0;
955 continue;
956 }
957 tst = dup_super(st);
958 if (tst->ss->load_super(tst,fd, NULL)) {
959 close(fd);
960 pr_err("RAID superblock disappeared from %s - not updating.\n",
961 devices[chosen_drive].devname);
962 devices[chosen_drive].i.events = 0;
963 continue;
964 }
965 content->events = devices[most_recent].i.events;
966 tst->ss->update_super(tst, content, UOPT_SPEC_FORCE_ONE,
967 devices[chosen_drive].devname, c->verbose,
968 0, NULL);
969
970 if (tst->ss->store_super(tst, fd)) {
971 close(fd);
972 pr_err("Could not re-write superblock on %s\n",
973 devices[chosen_drive].devname);
974 devices[chosen_drive].i.events = 0;
975 tst->ss->free_super(tst);
976 continue;
977 }
978 close(fd);
979 devices[chosen_drive].i.events = devices[most_recent].i.events;
980 devices[chosen_drive].uptodate = 1;
981 avail[chosen_drive] = 1;
982 okcnt++;
983 tst->ss->free_super(tst);
984 /* If there are any other drives of the same vintage,
985 * add them in as well. We can't lose and we might gain
986 */
987 for (i = 0;
988 i < content->array.raid_disks * 2 && i < bestcnt ;
989 i += 2) {
990 int j = best[i];
991 if (j >= 0 &&
992 !devices[j].uptodate &&
993 devices[j].i.recovery_start == MaxSector &&
994 devices[j].i.events == current_events &&
995 ((!devices[j].i.reshape_active &&
996 !content->reshape_active) ||
997 (devices[j].i.reshape_active ==
998 content->reshape_active &&
999 devices[j].i.reshape_progress ==
1000 content->reshape_progress))) {
1001 chosen_drive = j;
1002 goto add_another;
1003 }
1004 }
1005 }
1006 return okcnt;
1007 }
1008
1009 static int start_array(int mdfd,
1010 char *mddev,
1011 struct mdinfo *content,
1012 struct supertype *st,
1013 struct mddev_ident *ident,
1014 int *best, int bestcnt,
1015 int chosen_drive,
1016 struct devs *devices,
1017 unsigned int okcnt,
1018 unsigned int sparecnt,
1019 unsigned int rebuilding_cnt,
1020 unsigned int journalcnt,
1021 struct context *c,
1022 int clean, char *avail,
1023 int start_partial_ok,
1024 int err_ok,
1025 int was_forced
1026 )
1027 {
1028 int rv;
1029 int i;
1030 unsigned int req_cnt;
1031
1032 if (content->journal_device_required && (content->journal_clean == 0)) {
1033 if (!c->force) {
1034 pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n");
1035 return 1;
1036 }
1037 pr_err("Journal is missing or stale, starting array read only.\n");
1038 c->readonly = 1;
1039 }
1040
1041 if (content->consistency_policy == CONSISTENCY_POLICY_PPL)
1042 clean = 1;
1043
1044 rv = set_array_info(mdfd, st, content);
1045 if (rv && !err_ok) {
1046 pr_err("failed to set array info for %s: %s\n",
1047 mddev, strerror(errno));
1048 return 1;
1049 }
1050
1051 /* First, add the raid disks, but add the chosen one last */
1052 for (i = 0; i <= bestcnt; i++) {
1053 int j;
1054 if (i < bestcnt) {
1055 j = best[i];
1056 if (j == chosen_drive)
1057 continue;
1058 } else
1059 j = chosen_drive;
1060
1061 if (j >= 0 && !devices[j].included) {
1062 int dfd;
1063
1064 dfd = dev_open(devices[j].devname, O_RDWR|O_EXCL);
1065 if (dfd >= 0) {
1066 remove_partitions(dfd);
1067 close(dfd);
1068 }
1069 rv = add_disk(mdfd, st, content, &devices[j].i);
1070
1071 if (rv) {
1072 pr_err("failed to add %s to %s: %s\n",
1073 devices[j].devname, mddev,
1074 strerror(errno));
1075 if (errno == EINVAL && content->array.level == 0 &&
1076 content->array.layout != 0) {
1077 cont_err("Possibly your kernel doesn't support RAID0 layouts.\n");
1078 cont_err("Please upgrade.\n");
1079 }
1080 if (i < content->array.raid_disks * 2 ||
1081 i == bestcnt)
1082 okcnt--;
1083 else
1084 sparecnt--;
1085 } else if (c->verbose > 0) {
1086 pr_err("added %s to %s as %d%s%s\n",
1087 devices[j].devname, mddev,
1088 devices[j].i.disk.raid_disk,
1089 devices[j].uptodate?"":
1090 " (possibly out of date)",
1091 (devices[j].i.disk.state &
1092 (1<<MD_DISK_REPLACEMENT)) ?
1093 " replacement":"");
1094 }
1095 } else if (j >= 0) {
1096 if (c->verbose > 0)
1097 pr_err("%s is already in %s as %d\n",
1098 devices[j].devname, mddev,
1099 devices[j].i.disk.raid_disk);
1100 } else if (c->verbose > 0 &&
1101 i < content->array.raid_disks * 2 && (i & 1) == 0)
1102 pr_err("no uptodate device for slot %d of %s\n",
1103 i/2, mddev);
1104 }
1105
1106 if (is_container(content->array.level)) {
1107 sysfs_rules_apply(mddev, content);
1108 if (c->verbose >= 0) {
1109 pr_err("Container %s has been assembled with %d drive%s",
1110 mddev, okcnt + sparecnt + journalcnt,
1111 okcnt + sparecnt + journalcnt == 1 ? "" : "s");
1112 if (okcnt < (unsigned)content->array.raid_disks)
1113 fprintf(stderr, " (out of %d)\n",
1114 content->array.raid_disks);
1115 else
1116 fprintf(stderr, "\n");
1117 }
1118
1119 if (st->ss->validate_container) {
1120 struct mdinfo *devices_list;
1121 struct mdinfo *info_devices;
1122 unsigned int count;
1123
1124 devices_list = NULL;
1125 info_devices = xmalloc(sizeof(struct mdinfo) *
1126 (okcnt + sparecnt));
1127 for (count = 0; count < okcnt + sparecnt; count++) {
1128 info_devices[count] = devices[count].i;
1129 info_devices[count].next = devices_list;
1130 devices_list = &info_devices[count];
1131 }
1132 if (st->ss->validate_container(devices_list))
1133 pr_err("Mismatch detected!\n");
1134 free(info_devices);
1135 }
1136
1137 st->ss->free_super(st);
1138 sysfs_uevent(content, "change");
1139 if (err_ok && okcnt < (unsigned)content->array.raid_disks)
1140 /* Was partial, is still partial, so signal an error
1141 * to ensure we don't retry */
1142 return 1;
1143 return 0;
1144 }
1145
1146 /* Get number of in-sync devices according to the superblock.
1147 * We must have this number to start the array without -s or -R
1148 */
1149 req_cnt = content->array.working_disks;
1150
1151 if (c->runstop == 1 ||
1152 (c->runstop <= 0 &&
1153 (enough(content->array.level, content->array.raid_disks,
1154 content->array.layout, clean, avail) &&
1155 (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)))) {
1156 /* This array is good-to-go.
1157 * If a reshape is in progress then we might need to
1158 * continue monitoring it. In that case we start
1159 * it read-only and let the grow code make it writable.
1160 */
1161 int rv;
1162
1163 if (content->reshape_active &&
1164 !(content->reshape_active & RESHAPE_NO_BACKUP) &&
1165 content->delta_disks <= 0) {
1166 if (!c->backup_file) {
1167 pr_err("%s: Need a backup file to complete reshape of this array.\n",
1168 mddev);
1169 pr_err("Please provided one with \"--backup-file=...\"\n");
1170 if (c->update == UOPT_REVERT_RESHAPE)
1171 pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n");
1172 return 1;
1173 }
1174 rv = sysfs_set_str(content, NULL,
1175 "array_state", "readonly");
1176 if (rv == 0)
1177 rv = Grow_continue(mdfd, st, content, 0, c);
1178 } else if (c->readonly &&
1179 sysfs_attribute_available(content, NULL,
1180 "array_state")) {
1181 rv = sysfs_set_str(content, NULL,
1182 "array_state", "readonly");
1183 } else
1184 rv = ioctl(mdfd, RUN_ARRAY, NULL);
1185 reopen_mddev(mdfd); /* drop O_EXCL */
1186 if (rv == 0) {
1187 sysfs_rules_apply(mddev, content);
1188 if (c->verbose >= 0) {
1189 pr_info("%s has been started with %d drive%s",
1190 mddev, okcnt, okcnt==1?"":"s");
1191 if (okcnt < (unsigned)content->array.raid_disks)
1192 printf(" (out of %d)", content->array.raid_disks);
1193 if (rebuilding_cnt)
1194 printf("%s %d rebuilding", sparecnt?",":" and",
1195 rebuilding_cnt);
1196 if (sparecnt)
1197 printf(" and %d spare%s", sparecnt,
1198 sparecnt == 1 ? "" : "s");
1199 if (content->journal_clean)
1200 printf(" and %d journal", journalcnt);
1201 printf(".\n");
1202 }
1203 if (content->reshape_active &&
1204 is_level456(content->array.level)) {
1205 /* might need to increase the size
1206 * of the stripe cache - default is 256
1207 */
1208 int chunk_size = content->array.chunk_size;
1209
1210 if (content->reshape_active &&
1211 content->new_chunk > chunk_size)
1212 chunk_size = content->new_chunk;
1213 if (256 < 4 * ((chunk_size+4065)/4096)) {
1214 struct mdinfo *sra;
1215
1216 sra = sysfs_read(mdfd, NULL, 0);
1217 if (sra)
1218 sysfs_set_num(sra, NULL,
1219 "stripe_cache_size",
1220 (4 * chunk_size / 4096) + 1);
1221 sysfs_free(sra);
1222 }
1223 }
1224 if (okcnt < (unsigned)content->array.raid_disks) {
1225 /* If any devices did not get added
1226 * because the kernel rejected them based
1227 * on event count, try adding them
1228 * again providing the action policy is
1229 * 're-add' or greater. The bitmap
1230 * might allow them to be included, or
1231 * they will become spares.
1232 */
1233 for (i = 0; i < bestcnt; i++) {
1234 int j = best[i];
1235 if (j >= 0 && !devices[j].uptodate) {
1236 if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
1237 continue;
1238 rv = add_disk(mdfd, st, content,
1239 &devices[j].i);
1240 if (rv == 0 && c->verbose >= 0)
1241 pr_err("%s has been re-added.\n",
1242 devices[j].devname);
1243 }
1244 }
1245 }
1246 if (content->array.level == 6 &&
1247 okcnt + 1 == (unsigned)content->array.raid_disks &&
1248 was_forced) {
1249 struct mdinfo *sra;
1250
1251 sra = sysfs_read(mdfd, NULL, 0);
1252 if (sra)
1253 sysfs_set_str(sra, NULL,
1254 "sync_action", "repair");
1255 sysfs_free(sra);
1256 }
1257 return 0;
1258 }
1259 pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno));
1260 if (errno == 524 /* ENOTSUP */ &&
1261 content->array.level == 0 && content->array.layout == 0)
1262 cont_err("Please use --update=layout-original or --update=layout-alternate\n");
1263
1264 if (!enough(content->array.level, content->array.raid_disks,
1265 content->array.layout, 1, avail))
1266 pr_err("Not enough devices to start the array.\n");
1267 else if (!enough(content->array.level,
1268 content->array.raid_disks,
1269 content->array.layout, clean, avail))
1270 pr_err("Not enough devices to start the array while not clean - consider --force.\n");
1271
1272 return 1;
1273 }
1274 if (c->runstop == -1) {
1275 pr_err("%s assembled from %d drive%s",
1276 mddev, okcnt, okcnt == 1 ? "" : "s");
1277 if (okcnt != (unsigned)content->array.raid_disks)
1278 fprintf(stderr, " (out of %d)",
1279 content->array.raid_disks);
1280 fprintf(stderr, ", but not started.\n");
1281 return 2;
1282 }
1283 if (c->verbose >= -1) {
1284 pr_err("%s assembled from %d drive%s",
1285 mddev, okcnt, okcnt == 1 ? "" : "s");
1286 if (rebuilding_cnt)
1287 fprintf(stderr, "%s %d rebuilding",
1288 sparecnt ? "," : " and", rebuilding_cnt);
1289 if (sparecnt)
1290 fprintf(stderr, " and %d spare%s", sparecnt,
1291 sparecnt == 1 ? "" : "s");
1292 if (!enough(content->array.level, content->array.raid_disks,
1293 content->array.layout, 1, avail))
1294 fprintf(stderr, " - not enough to start the array.\n");
1295 else if (!enough(content->array.level,
1296 content->array.raid_disks,
1297 content->array.layout, clean, avail))
1298 fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n");
1299 else {
1300 if (req_cnt == (unsigned)content->array.raid_disks)
1301 fprintf(stderr, " - need all %d to start it",
1302 req_cnt);
1303 else
1304 fprintf(stderr, " - need %d to start", req_cnt);
1305 fprintf(stderr, " (use --run to insist).\n");
1306 }
1307 }
1308 return 1;
1309 }
1310
1311 int Assemble(struct supertype *st, char *mddev,
1312 struct mddev_ident *ident,
1313 struct mddev_dev *devlist,
1314 struct context *c)
1315 {
1316 /*
1317 * The task of Assemble is to find a collection of
1318 * devices that should (according to their superblocks)
1319 * form an array, and to give this collection to the MD driver.
1320 * In Linux-2.4 and later, this involves submitting a
1321 * SET_ARRAY_INFO ioctl with no arg - to prepare
1322 * the array - and then submit a number of
1323 * ADD_NEW_DISK ioctls to add disks into
1324 * the array. Finally RUN_ARRAY might
1325 * be submitted to start the array.
1326 *
1327 * Much of the work of Assemble is in finding and/or
1328 * checking the disks to make sure they look right.
1329 *
1330 * If mddev is not set, then scan must be set and we
1331 * read through the config file for dev+uuid mapping
1332 * We recurse, setting mddev, for each device that
1333 * - isn't running
1334 * - has a valid uuid (or any uuid if !uuidset)
1335 *
1336 * If mddev is set, we try to determine state of md.
1337 * check version - must be at least 0.90.0
1338 * check kernel version. must be at least 2.4.
1339 * If not, we can possibly fall back on START_ARRAY
1340 * Try to GET_ARRAY_INFO.
1341 * If possible, give up
1342 * If not, try to STOP_ARRAY just to make sure
1343 *
1344 * If !uuidset and scan, look in conf-file for uuid
1345 * If not found, give up
1346 * If !devlist and scan and uuidset, get list of devs from conf-file
1347 *
1348 * For each device:
1349 * Check superblock - discard if bad
1350 * Check uuid (set if we don't have one) - discard if no match
1351 * Check superblock similarity if we have a superblock - discard if different
1352 * Record events, devicenum
1353 * This should give us a list of devices for the array
1354 * We should collect the most recent event number
1355 *
1356 * Count disks with recent enough event count
1357 * While force && !enough disks
1358 * Choose newest rejected disks, update event count
1359 * mark clean and rewrite superblock
1360 * If recent kernel:
1361 * SET_ARRAY_INFO
1362 * foreach device with recent events : ADD_NEW_DISK
1363 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
1364 * If old kernel:
1365 * Check the device numbers in superblock are right
1366 * update superblock if any changes
1367 * START_ARRAY
1368 *
1369 */
1370 int rv = -1;
1371 int mdfd = -1;
1372 int clean;
1373 int auto_assem = (mddev == NULL && !ident->uuid_set &&
1374 ident->super_minor == UnSet && ident->name[0] == 0 &&
1375 (ident->container == NULL || ident->member == NULL));
1376 struct devs *devices = NULL;
1377 char *devmap;
1378 int *best = NULL; /* indexed by raid_disk */
1379 int bestcnt = 0;
1380 int devcnt;
1381 unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt;
1382 int journal_clean = 0;
1383 int i;
1384 int was_forced = 0;
1385 int most_recent = 0;
1386 int chosen_drive;
1387 int change = 0;
1388 int inargv = 0;
1389 int start_partial_ok = (c->runstop >= 0) &&
1390 (c->force || devlist==NULL || auto_assem);
1391 int num_devs;
1392 struct mddev_dev *tmpdev;
1393 struct mdinfo info;
1394 struct mdinfo *content = NULL;
1395 struct mdinfo *pre_exist = NULL;
1396 char *avail;
1397 char *name = NULL;
1398 char chosen_name[1024];
1399 struct map_ent *map = NULL;
1400 struct map_ent *mp;
1401
1402 /*
1403 * If any subdevs are listed, then any that don't
1404 * match ident are discarded. Remainder must all match and
1405 * become the array.
1406 * If no subdevs, then we scan all devices in the config file, but
1407 * there must be something in the identity
1408 */
1409
1410 if (!devlist &&
1411 ident->uuid_set == 0 &&
1412 (ident->super_minor < 0 || ident->super_minor == UnSet) &&
1413 ident->name[0] == 0 &&
1414 (ident->container == NULL || ident->member == NULL) &&
1415 ident->devices == NULL) {
1416 pr_err("No identity information available for %s - cannot assemble.\n",
1417 mddev ? mddev : "further assembly");
1418 return 1;
1419 }
1420
1421 if (devlist == NULL)
1422 devlist = conf_get_devs();
1423 else if (mddev)
1424 inargv = 1;
1425
1426 try_again:
1427 /* We come back here when doing auto-assembly and attempting some
1428 * set of devices failed. Those are now marked as ->used==2 and
1429 * we ignore them and try again
1430 */
1431 if (!st && ident->st)
1432 st = ident->st;
1433 if (c->verbose>0)
1434 pr_err("looking for devices for %s\n",
1435 mddev ? mddev : "further assembly");
1436
1437 content = &info;
1438 if (st && c->force)
1439 st->ignore_hw_compat = 1;
1440 num_devs = select_devices(devlist, ident, &st, &content, c,
1441 inargv, auto_assem);
1442 if (num_devs < 0)
1443 return 1;
1444
1445 if (!st || !st->sb || !content)
1446 return 2;
1447
1448 /* We have a full set of devices - we now need to find the
1449 * array device.
1450 * However there is a risk that we are racing with "mdadm -I"
1451 * and the array is already partially assembled - we will have
1452 * rejected any devices already in this address.
1453 * So we take a lock on the map file - to prevent further races -
1454 * and look for the uuid in there. If found and the array is
1455 * active, we abort. If found and the array is not active
1456 * we commit to that md device and add all the contained devices
1457 * to our list. We flag them so that we don't try to re-add,
1458 * but can remove if they turn out to not be wanted.
1459 */
1460 if (map_lock(&map))
1461 pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
1462 if (c->update == UOPT_UUID)
1463 mp = NULL;
1464 else
1465 mp = map_by_uuid(&map, content->uuid);
1466 if (mp) {
1467 struct mdinfo *dv;
1468 pre_exist = sysfs_read(-1, mp->devnm, GET_LEVEL|GET_DEVS);
1469 if (!pre_exist)
1470 goto out;
1471
1472 /* array already exists. */
1473 if (pre_exist->array.level != UnSet) {
1474 pr_err("Found some drive for an array that is already active: %s\n",
1475 mp->path);
1476 pr_err("giving up.\n");
1477 goto out;
1478 }
1479 for (dv = pre_exist->devs; dv; dv = dv->next) {
1480 /* We want to add this device to our list,
1481 * but it could already be there if "mdadm -I"
1482 * started *after* we checked for O_EXCL.
1483 * If we add it to the top of the list
1484 * it will be preferred over later copies.
1485 */
1486 struct mddev_dev *newdev;
1487 char *devname = map_dev(dv->disk.major,
1488 dv->disk.minor,
1489 0);
1490 if (!devname)
1491 continue;
1492 newdev = xmalloc(sizeof(*newdev));
1493 newdev->devname = devname;
1494 newdev->disposition = 'I';
1495 newdev->used = 1;
1496 newdev->next = devlist;
1497 devlist = newdev;
1498 num_devs++;
1499 }
1500 strcpy(chosen_name, mp->path);
1501 if (c->verbose > 0 || mddev == NULL ||
1502 strcmp(mddev, chosen_name) != 0)
1503 pr_err("Merging with already-assembled %s\n",
1504 chosen_name);
1505 mdfd = open_dev_excl(mp->devnm);
1506 } else {
1507 int trustworthy = FOREIGN;
1508 name = content->name;
1509 switch (st->ss->match_home(st, c->homehost)
1510 ?: st->ss->match_home(st, "any")) {
1511 case 1:
1512 trustworthy = LOCAL;
1513 name = strchr(content->name, ':');
1514 if (name)
1515 name++;
1516 else
1517 name = content->name;
1518 break;
1519 }
1520 if (mddev && map_by_name(&map, mddev) != NULL) {
1521 pr_err("Cannot create device with %s because is in use\n", mddev);
1522 goto out;
1523 }
1524 if (!auto_assem)
1525 /* If the array is listed in mdadm.conf or on
1526 * command line, then we trust the name
1527 * even if the array doesn't look local
1528 */
1529 trustworthy = LOCAL;
1530
1531 if (!name[0] && is_container(content->array.level)) {
1532 name = content->text_version;
1533 trustworthy = METADATA;
1534 }
1535
1536 if (name[0] && trustworthy != LOCAL &&
1537 ! c->require_homehost &&
1538 conf_name_is_free(name))
1539 trustworthy = LOCAL;
1540
1541 if (trustworthy == LOCAL &&
1542 strchr(name, ':'))
1543 /* Ignore 'host:' prefix of name */
1544 name = strchr(name, ':')+1;
1545
1546 mdfd = create_mddev(mddev, name, trustworthy, chosen_name, 0);
1547 }
1548 if (mdfd < 0) {
1549 st->ss->free_super(st);
1550 if (auto_assem)
1551 goto try_again;
1552 goto out;
1553 }
1554 mddev = chosen_name;
1555 if (pre_exist == NULL) {
1556 if (mddev_busy(fd2devnm(mdfd))) {
1557 pr_err("%s already active, cannot restart it!\n",
1558 mddev);
1559 for (tmpdev = devlist ;
1560 tmpdev && tmpdev->used != 1;
1561 tmpdev = tmpdev->next)
1562 ;
1563 if (tmpdev && auto_assem)
1564 pr_err("%s needed for %s...\n",
1565 mddev, tmpdev->devname);
1566 close(mdfd);
1567 mdfd = -3;
1568 st->ss->free_super(st);
1569 if (auto_assem)
1570 goto try_again;
1571 goto out;
1572 }
1573 }
1574
1575 if (content != &info) {
1576 /* This is a member of a container. Try starting the array. */
1577 int err;
1578 err = assemble_container_content(st, mdfd, content, c,
1579 chosen_name, NULL);
1580 close(mdfd);
1581 sysfs_free(pre_exist);
1582 return err;
1583 }
1584
1585 /* Ok, no bad inconsistancy, we can try updating etc */
1586 devices = xcalloc(num_devs, sizeof(*devices));
1587 devmap = xcalloc(num_devs, content->array.raid_disks);
1588 devcnt = load_devices(devices, devmap, ident, &st, devlist,
1589 c, content, mdfd, mddev,
1590 &most_recent, &bestcnt, &best, inargv);
1591 if (devcnt < 0) {
1592 mdfd = -3;
1593 /*
1594 * devices is already freed in load_devices, so set devices
1595 * to NULL to avoid double free devices.
1596 */
1597 devices = NULL;
1598 goto out;
1599 }
1600
1601 if (devcnt == 0) {
1602 pr_err("no devices found for %s\n",
1603 mddev);
1604 if (st)
1605 st->ss->free_super(st);
1606 free(devmap);
1607 goto out;
1608 }
1609
1610 if (c->update == UOPT_BYTEORDER)
1611 st->minor_version = 90;
1612
1613 st->ss->getinfo_super(st, content, NULL);
1614 clean = content->array.state & 1;
1615
1616 /* now we have some devices that might be suitable.
1617 * I wonder how many
1618 */
1619 avail = xcalloc(content->array.raid_disks, 1);
1620 okcnt = 0;
1621 replcnt = 0;
1622 sparecnt=0;
1623 journalcnt=0;
1624 rebuilding_cnt=0;
1625 for (i=0; i< bestcnt; i++) {
1626 int j = best[i];
1627 int event_margin = 1; /* always allow a difference of '1'
1628 * like the kernel does
1629 */
1630 if (j < 0) continue;
1631 /* note: we ignore error flags in multipath arrays
1632 * as they don't make sense
1633 */
1634 if (content->array.level != LEVEL_MULTIPATH) {
1635 if (devices[j].i.disk.state & (1<<MD_DISK_JOURNAL)) {
1636 if (content->journal_device_required)
1637 journalcnt++;
1638 else /* unexpected journal, mark as faulty */
1639 devices[j].i.disk.state |= (1<<MD_DISK_FAULTY);
1640 } else if (!(devices[j].i.disk.state & (1<<MD_DISK_ACTIVE))) {
1641 if (!(devices[j].i.disk.state
1642 & (1<<MD_DISK_FAULTY))) {
1643 devices[j].uptodate = 1;
1644 sparecnt++;
1645 }
1646 continue;
1647 }
1648 }
1649 /* If this device thinks that 'most_recent' has failed, then
1650 * we must reject this device.
1651 */
1652 if (j != most_recent && !c->force &&
1653 content->array.raid_disks > 0 &&
1654 devices[most_recent].i.disk.raid_disk >= 0 &&
1655 devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) {
1656 if (c->verbose > -1)
1657 pr_err("ignoring %s as it reports %s as failed\n",
1658 devices[j].devname, devices[most_recent].devname);
1659 best[i] = -1;
1660 continue;
1661 }
1662 /* Require event counter to be same as, or just less than,
1663 * most recent. If it is bigger, it must be a stray spare and
1664 * should be ignored.
1665 */
1666 if (devices[j].i.events+event_margin >=
1667 devices[most_recent].i.events &&
1668 devices[j].i.events <=
1669 devices[most_recent].i.events
1670 ) {
1671 devices[j].uptodate = 1;
1672 if (devices[j].i.disk.state & (1<<MD_DISK_JOURNAL))
1673 journal_clean = 1;
1674 if (i < content->array.raid_disks * 2) {
1675 if (devices[j].i.recovery_start == MaxSector ||
1676 (content->reshape_active &&
1677 i >= content->array.raid_disks - content->delta_disks)) {
1678 if (!avail[i/2]) {
1679 okcnt++;
1680 avail[i/2]=1;
1681 } else
1682 replcnt++;
1683 } else
1684 rebuilding_cnt++;
1685 } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL)
1686 sparecnt++;
1687 }
1688 }
1689 free(devmap);
1690 if (c->force) {
1691 int force_ok = force_array(content, devices, best, bestcnt,
1692 avail, most_recent, st, c);
1693 okcnt += force_ok;
1694 if (force_ok)
1695 was_forced = 1;
1696 }
1697 /* Now we want to look at the superblock which the kernel will base things on
1698 * and compare the devices that we think are working with the devices that the
1699 * superblock thinks are working.
1700 * If there are differences and --force is given, then update this chosen
1701 * superblock.
1702 */
1703 chosen_drive = -1;
1704 st->ss->free_super(st);
1705 for (i=0; chosen_drive < 0 && i<bestcnt; i+=2) {
1706 int j = best[i];
1707 int fd;
1708
1709 if (j<0)
1710 continue;
1711 if (!devices[j].uptodate)
1712 continue;
1713 if (devices[j].i.events < devices[most_recent].i.events)
1714 continue;
1715 chosen_drive = j;
1716 if ((fd=dev_open(devices[j].devname,
1717 devices[j].included ? O_RDONLY
1718 : (O_RDONLY|O_EXCL)))< 0) {
1719 pr_err("Cannot open %s: %s\n",
1720 devices[j].devname, strerror(errno));
1721 free(avail);
1722 goto out;
1723 }
1724 if (st->ss->load_super(st,fd, NULL)) {
1725 close(fd);
1726 pr_err("RAID superblock has disappeared from %s\n",
1727 devices[j].devname);
1728 free(avail);
1729 goto out;
1730 }
1731 close(fd);
1732 }
1733 if (st->sb == NULL) {
1734 pr_err("No suitable drives found for %s\n", mddev);
1735 free(avail);
1736 goto out;
1737 }
1738 st->ss->getinfo_super(st, content, NULL);
1739 if (sysfs_init(content, mdfd, NULL)) {
1740 pr_err("Unable to initialize sysfs\n");
1741 free(avail);
1742 goto out;
1743 }
1744
1745 /* after reload context, store journal_clean in context */
1746 content->journal_clean = journal_clean;
1747 for (i=0; i<bestcnt; i++) {
1748 int j = best[i];
1749 unsigned int desired_state;
1750
1751 if (j < 0)
1752 continue;
1753 if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
1754 desired_state = (1<<MD_DISK_JOURNAL);
1755 else if (i >= content->array.raid_disks * 2)
1756 desired_state = 0;
1757 else if (i & 1)
1758 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_REPLACEMENT);
1759 else
1760 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
1761
1762 desired_state |= devices[j].i.disk.state & ((1<<MD_DISK_FAILFAST) |
1763 (1<<MD_DISK_WRITEMOSTLY));
1764
1765 if (!devices[j].uptodate)
1766 continue;
1767
1768 devices[j].i.disk.state = desired_state;
1769 if (!(devices[j].i.array.state & 1))
1770 clean = 0;
1771
1772 if (st->ss->update_super(st, &devices[j].i, UOPT_SPEC_ASSEMBLE, NULL,
1773 c->verbose, 0, NULL)) {
1774 if (c->force) {
1775 if (c->verbose >= 0)
1776 pr_err("clearing FAULTY flag for device %d in %s for %s\n",
1777 j, mddev, devices[j].devname);
1778 change = 1;
1779 } else {
1780 if (c->verbose >= -1)
1781 pr_err("device %d in %s has wrong state in superblock, but %s seems ok\n",
1782 i, mddev, devices[j].devname);
1783 }
1784 }
1785 }
1786 if (c->force && !clean && !is_container(content->array.level) &&
1787 !enough(content->array.level, content->array.raid_disks,
1788 content->array.layout, clean, avail)) {
1789 change += st->ss->update_super(st, content, UOPT_SPEC_FORCE_ARRAY,
1790 devices[chosen_drive].devname, c->verbose,
1791 0, NULL);
1792 was_forced = 1;
1793 clean = 1;
1794 }
1795
1796 if (change) {
1797 int fd;
1798 fd = dev_open(devices[chosen_drive].devname,
1799 devices[chosen_drive].included ?
1800 O_RDWR : (O_RDWR|O_EXCL));
1801 if (fd < 0) {
1802 pr_err("Could not open %s for write - cannot Assemble array.\n",
1803 devices[chosen_drive].devname);
1804 free(avail);
1805 goto out;
1806 }
1807 if (st->ss->store_super(st, fd)) {
1808 close(fd);
1809 pr_err("Could not re-write superblock on %s\n",
1810 devices[chosen_drive].devname);
1811 free(avail);
1812 goto out;
1813 }
1814 if (c->verbose >= 0)
1815 pr_err("Marking array %s as 'clean'\n",
1816 mddev);
1817 close(fd);
1818 }
1819
1820 /* If we are in the middle of a reshape we may need to restore saved data
1821 * that was moved aside due to the reshape overwriting live data
1822 * The code of doing this lives in Grow.c
1823 */
1824 if (content->reshape_active &&
1825 !(content->reshape_active & RESHAPE_NO_BACKUP)) {
1826 int err = 0;
1827 int *fdlist = xmalloc(sizeof(int)* bestcnt);
1828 if (c->verbose > 0)
1829 pr_err("%s has an active reshape - checking if critical section needs to be restored\n",
1830 chosen_name);
1831 if (!c->backup_file)
1832 c->backup_file = locate_backup(content->sys_name);
1833 enable_fds(bestcnt/2);
1834 for (i = 0; i < bestcnt/2; i++) {
1835 int j = best[i*2];
1836 if (j >= 0) {
1837 fdlist[i] = dev_open(devices[j].devname,
1838 devices[j].included
1839 ? O_RDWR : (O_RDWR|O_EXCL));
1840 if (fdlist[i] < 0) {
1841 pr_err("Could not open %s for write - cannot Assemble array.\n",
1842 devices[j].devname);
1843 err = 1;
1844 break;
1845 }
1846 } else
1847 fdlist[i] = -1;
1848 }
1849 if (!err) {
1850 if (st->ss->external && st->ss->recover_backup)
1851 err = st->ss->recover_backup(st, content);
1852 else
1853 err = Grow_restart(st, content, fdlist, bestcnt/2,
1854 c->backup_file, c->verbose > 0);
1855 if (err && c->invalid_backup) {
1856 if (c->verbose > 0)
1857 pr_err("continuing without restoring backup\n");
1858 err = 0;
1859 }
1860 }
1861 while (i>0) {
1862 i--;
1863 if (fdlist[i]>=0) close(fdlist[i]);
1864 }
1865 free(fdlist);
1866 if (err) {
1867 pr_err("Failed to restore critical section for reshape, sorry.\n");
1868 if (c->backup_file == NULL)
1869 cont_err("Possibly you needed to specify the --backup-file\n");
1870 free(avail);
1871 goto out;
1872 }
1873 }
1874
1875 /* Almost ready to actually *do* something */
1876 /* First, fill in the map, so that udev can find our name
1877 * as soon as we become active.
1878 */
1879 if (c->update == UOPT_METADATA) {
1880 content->array.major_version = 1;
1881 content->array.minor_version = 0;
1882 strcpy(content->text_version, "1.0");
1883 }
1884
1885 map_update(&map, fd2devnm(mdfd), content->text_version,
1886 content->uuid, chosen_name);
1887
1888 rv = start_array(mdfd, mddev, content,
1889 st, ident, best, bestcnt,
1890 chosen_drive, devices, okcnt, sparecnt,
1891 rebuilding_cnt, journalcnt,
1892 c,
1893 clean, avail, start_partial_ok,
1894 pre_exist != NULL,
1895 was_forced);
1896 if (rv == 1 && !pre_exist)
1897 ioctl(mdfd, STOP_ARRAY, NULL);
1898 free(devices);
1899 free(avail);
1900 out:
1901 map_unlock(&map);
1902 if (rv == 0) {
1903 wait_for(chosen_name, mdfd);
1904 close(mdfd);
1905 if (auto_assem) {
1906 int usecs = 1;
1907 /* There is a nasty race with 'mdadm --monitor'.
1908 * If it opens this device before we close it,
1909 * it gets an incomplete open on which IO
1910 * doesn't work and the capacity is
1911 * wrong.
1912 * If we reopen (to check for layered devices)
1913 * before --monitor closes, we loose.
1914 *
1915 * So: wait upto 1 second for there to be
1916 * a non-zero capacity.
1917 */
1918 while (usecs < 1000) {
1919 mdfd = open(mddev, O_RDONLY);
1920 if (mdfd >= 0) {
1921 unsigned long long size;
1922 if (get_dev_size(mdfd, NULL, &size) &&
1923 size > 0)
1924 break;
1925 close(mdfd);
1926 }
1927 sleep_for(0, USEC_TO_NSEC(usecs), true);
1928 usecs <<= 1;
1929 }
1930 }
1931 } else if (mdfd >= 0)
1932 close(mdfd);
1933
1934 free(best);
1935 sysfs_free(pre_exist);
1936
1937 /* '2' means 'OK, but not started yet' */
1938 if (rv == -1) {
1939 free(devices);
1940 return 1;
1941 }
1942 close(mdfd);
1943 return rv == 2 ? 0 : rv;
1944 }
1945
1946 int assemble_container_content(struct supertype *st, int mdfd,
1947 struct mdinfo *content, struct context *c,
1948 char *chosen_name, int *result)
1949 {
1950 struct mdinfo *dev, *sra, *dev2;
1951 struct assembly_array_info array = {chosen_name, 0, 0, 0};
1952 int old_raid_disks;
1953 int start_reshape;
1954 char *avail;
1955 int err;
1956 int is_clean, all_disks;
1957 bool is_raid456;
1958
1959 if (sysfs_init(content, mdfd, NULL)) {
1960 pr_err("Unable to initialize sysfs\n");
1961 return 1;
1962 }
1963
1964 sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
1965 if (sra == NULL) {
1966 pr_err("Failed to read sysfs parameters\n");
1967 return 1;
1968 }
1969
1970 /* Fill sysfs properties only if they are not set. Determine it by checking text_version
1971 * and ignoring special character on the first place.
1972 */
1973 if (strcmp(sra->text_version + 1, content->text_version + 1) != 0) {
1974 if (sysfs_set_array(content) != 0) {
1975 sysfs_free(sra);
1976 return 1;
1977 }
1978 }
1979
1980 /* There are two types of reshape: container wide or sub-array specific
1981 * Check if metadata requests blocking container wide reshapes
1982 */
1983 start_reshape = (content->reshape_active &&
1984 !((content->reshape_active == CONTAINER_RESHAPE) &&
1985 (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
1986
1987 /* Block subarray here if it is under reshape now
1988 * Do not allow for any changes in this array
1989 */
1990 if (st->ss->external && content->recovery_blocked && start_reshape)
1991 block_subarray(content);
1992
1993 for (dev2 = sra->devs; dev2; dev2 = dev2->next) {
1994 for (dev = content->devs; dev; dev = dev->next)
1995 if (dev2->disk.major == dev->disk.major &&
1996 dev2->disk.minor == dev->disk.minor)
1997 break;
1998 if (dev)
1999 continue;
2000 /* Don't want this one any more */
2001 if (sysfs_set_str(sra, dev2, "slot", STR_COMMON_NONE) < 0 && errno == EBUSY) {
2002 pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name);
2003 sysfs_free(sra);
2004 return 1;
2005 }
2006 sysfs_set_str(sra, dev2, "state", "remove");
2007 }
2008 old_raid_disks = content->array.raid_disks - content->delta_disks;
2009 avail = xcalloc(content->array.raid_disks, 1);
2010 for (dev = content->devs; dev; dev = dev->next) {
2011 if (dev->disk.raid_disk >= 0)
2012 avail[dev->disk.raid_disk] = 1;
2013 if (sysfs_add_disk(content, dev, 1) == 0) {
2014 if (dev->disk.raid_disk >= old_raid_disks &&
2015 content->reshape_active)
2016 array.exp_cnt++;
2017 else
2018 array.new_cnt++;
2019 } else if (errno == EEXIST)
2020 array.preexist_cnt++;
2021 }
2022 sysfs_free(sra);
2023
2024 all_disks = array.new_cnt + array.exp_cnt + array.preexist_cnt;
2025
2026 map_update(NULL, fd2devnm(mdfd), content->text_version,
2027 content->uuid, chosen_name);
2028
2029 if (content->consistency_policy == CONSISTENCY_POLICY_PPL &&
2030 st->ss->validate_ppl) {
2031 content->array.state |= 1;
2032 err = 0;
2033
2034 for (dev = content->devs; dev; dev = dev->next) {
2035 int dfd;
2036 char *devpath;
2037 int ret;
2038
2039 ret = st->ss->validate_ppl(st, content, dev);
2040 if (ret == 0)
2041 continue;
2042
2043 if (ret < 0) {
2044 err = 1;
2045 break;
2046 }
2047
2048 if (!c->force) {
2049 pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n",
2050 chosen_name);
2051 content->array.state &= ~1;
2052 avail[dev->disk.raid_disk] = 0;
2053 break;
2054 }
2055
2056 /* have --force - overwrite the invalid ppl */
2057 devpath = map_dev(dev->disk.major, dev->disk.minor, 0);
2058 dfd = dev_open(devpath, O_RDWR);
2059 if (dfd < 0) {
2060 pr_err("Failed to open %s\n", devpath);
2061 err = 1;
2062 break;
2063 }
2064
2065 err = st->ss->write_init_ppl(st, content, dfd);
2066 close(dfd);
2067
2068 if (err)
2069 break;
2070 }
2071
2072 if (err) {
2073 free(avail);
2074 return err;
2075 }
2076 } else if (c->force) {
2077 /* Set the array as 'clean' so that we can proceed with starting
2078 * it even if we don't have all devices. Mdmon doesn't care
2079 * if the dirty flag is set in metadata, it will start managing
2080 * it anyway.
2081 * This is really important for raid456 (RWH case), other levels
2082 * are started anyway.
2083 */
2084 content->array.state |= 1;
2085 }
2086
2087 is_raid456 = is_level456(content->array.level);
2088 is_clean = content->array.state & 1;
2089
2090 if (enough(content->array.level, content->array.raid_disks,
2091 content->array.layout, is_clean, avail) == 0) {
2092 set_array_assembly_status(c, result, INCR_NO, &array);
2093
2094 if (c->verbose >= 0 && is_raid456 && !is_clean)
2095 pr_err("Consider --force to start dirty degraded array\n");
2096
2097 free(avail);
2098 return 1;
2099 }
2100 free(avail);
2101
2102 if (c->runstop <= 0 && all_disks < content->array.working_disks) {
2103
2104 set_array_assembly_status(c, result, INCR_UNSAFE, &array);
2105
2106 if (c->verbose >= 0 && c->force)
2107 pr_err("Consider --run to start array as degraded.\n");
2108 return 1;
2109 }
2110
2111 if (is_raid456 && content->resync_start != MaxSector && c->force &&
2112 all_disks < content->array.raid_disks) {
2113
2114 content->resync_start = MaxSector;
2115 err = sysfs_set_num(content, NULL, "resync_start", MaxSector);
2116 if (err)
2117 return 1;
2118
2119 pr_err("%s array state forced to clean. It may cause data corruption.\n",
2120 chosen_name);
2121 }
2122
2123 /*
2124 * Before activating the array, perform extra steps required
2125 * to configure the internal write-intent bitmap.
2126 */
2127 if (content->consistency_policy == CONSISTENCY_POLICY_BITMAP &&
2128 st->ss->set_bitmap)
2129 st->ss->set_bitmap(st, content);
2130
2131 if (start_reshape) {
2132 int spare = content->array.raid_disks + array.exp_cnt;
2133 if (restore_backup(st, content,
2134 array.new_cnt,
2135 spare, &c->backup_file, c->verbose) == 1)
2136 return 1;
2137
2138 if (content->reshape_progress == 0) {
2139 /* If reshape progress is 0 - we are assembling the
2140 * array that was stopped, before reshape has started.
2141 * Array needs to be started as active, Grow_continue()
2142 * will start the reshape.
2143 */
2144 sysfs_set_num(content, NULL, "reshape_position",
2145 MaxSector);
2146 err = sysfs_set_str(content, NULL,
2147 "array_state", "active");
2148 sysfs_set_num(content, NULL, "reshape_position", 0);
2149 } else {
2150 err = sysfs_set_str(content, NULL,
2151 "array_state", "readonly");
2152 }
2153
2154 if (err)
2155 return 1;
2156
2157 if (st->ss->external) {
2158 if (!mdmon_running(st->container_devnm))
2159 start_mdmon(st->container_devnm);
2160 ping_monitor(st->container_devnm);
2161 if (wait_for_mdmon(st->container_devnm) == MDADM_STATUS_SUCCESS &&
2162 !st->update_tail)
2163 st->update_tail = &st->updates;
2164 }
2165
2166 err = Grow_continue(mdfd, st, content, 0, c);
2167 } else switch(content->array.level) {
2168 case LEVEL_LINEAR:
2169 case LEVEL_MULTIPATH:
2170 case 0:
2171 err = sysfs_set_str(content, NULL, "array_state",
2172 c->readonly ? "readonly" : "active");
2173 break;
2174 default:
2175 err = sysfs_set_str(content, NULL, "array_state",
2176 "readonly");
2177 /* start mdmon if needed. */
2178 if (!err) {
2179 if (!mdmon_running(st->container_devnm))
2180 start_mdmon(st->container_devnm);
2181 ping_monitor(st->container_devnm);
2182 }
2183 break;
2184 }
2185 if (!err)
2186 sysfs_set_safemode(content, content->safe_mode_delay);
2187
2188 /* Block subarray here if it is not reshaped now
2189 * It has be blocked a little later to allow mdmon to switch in
2190 * in to R/W state
2191 */
2192 if (st->ss->external && content->recovery_blocked &&
2193 !start_reshape)
2194 block_subarray(content);
2195
2196 if (err)
2197 set_array_assembly_status(c, result, INCR_NO, &array);
2198 else {
2199 set_array_assembly_status(c, result, INCR_YES, &array);
2200 wait_for(chosen_name, mdfd);
2201 sysfs_rules_apply(chosen_name, content);
2202 }
2203
2204 return err;
2205 /* FIXME should have an O_EXCL and wait for read-auto */
2206 }