]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Incremental.c
Use load_container in Incremental assembly.
[thirdparty/mdadm.git] / Incremental.c
1 /*
2 * Incremental.c - support --incremental. Part of:
3 * mdadm - manage Linux "md" devices aka RAID arrays.
4 *
5 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
24 * Paper: Neil Brown
25 * Novell Inc
26 * GPO Box Q1283
27 * QVB Post Office, NSW 1230
28 * Australia
29 */
30
31 #include "mdadm.h"
32 #include <dirent.h>
33 #include <ctype.h>
34
35 static int count_active(struct supertype *st, int mdfd, char **availp,
36 struct mdinfo *info);
37 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
38 int number, __u64 events, int verbose,
39 char *array_name);
40 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
41 struct supertype *st, int verbose);
42
43 static int Incremental_container(struct supertype *st, char *devname,
44 char *homehost,
45 int verbose, int runstop, int autof);
46
47 static struct mddev_ident_s *search_mdstat(struct supertype *st,
48 struct mdinfo *info,
49 char *devname,
50 int verbose, int *rvp);
51
52 int Incremental(char *devname, int verbose, int runstop,
53 struct supertype *st, char *homehost, int require_homehost,
54 int autof)
55 {
56 /* Add this device to an array, creating the array if necessary
57 * and starting the array if sensible or - if runstop>0 - if possible.
58 *
59 * This has several steps:
60 *
61 * 1/ Check if device is permitted by mdadm.conf, reject if not.
62 * 2/ Find metadata, reject if none appropriate (check
63 * version/name from args)
64 * 3/ Check if there is a match in mdadm.conf
65 * 3a/ if not, check for homehost match. If no match, assemble as
66 * a 'foreign' array.
67 * 4/ Determine device number.
68 * - If in mdadm.conf with std name, use that
69 * - UUID in /var/run/mdadm.map use that
70 * - If name is suggestive, use that. unless in use with different uuid.
71 * - Choose a free, high number.
72 * - Use a partitioned device unless strong suggestion not to.
73 * e.g. auto=md
74 * Don't choose partitioned for containers.
75 * 5/ Find out if array already exists
76 * 5a/ if it does not
77 * - choose a name, from mdadm.conf or 'name' field in array.
78 * - create the array
79 * - add the device
80 * 5b/ if it does
81 * - check one drive in array to make sure metadata is a reasonably
82 * close match. Reject if not (e.g. different type)
83 * - add the device
84 * 6/ Make sure /var/run/mdadm.map contains this array.
85 * 7/ Is there enough devices to possibly start the array?
86 * For a container, this means running Incremental_container.
87 * 7a/ if not, finish with success.
88 * 7b/ if yes,
89 * - read all metadata and arrange devices like -A does
90 * - if number of OK devices match expected, or -R and there are enough,
91 * start the array (auto-readonly).
92 */
93 struct stat stb;
94 struct mdinfo info, dinfo;
95 struct mddev_ident_s *match;
96 char chosen_name[1024];
97 int rv = 1;
98 struct map_ent *mp, *map = NULL;
99 int dfd = -1, mdfd = -1;
100 char *avail;
101 int active_disks;
102 int trustworthy;
103 char *name_to_use;
104 mdu_array_info_t ainf;
105 struct dev_policy *policy = NULL;
106 unsigned long long size;
107
108 struct createinfo *ci = conf_get_create_info();
109
110 if (stat(devname, &stb) < 0) {
111 if (verbose >= 0)
112 fprintf(stderr, Name ": stat failed for %s: %s.\n",
113 devname, strerror(errno));
114 return rv;
115 }
116 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
117 if (verbose >= 0)
118 fprintf(stderr, Name ": %s is not a block device.\n",
119 devname);
120 return rv;
121 }
122 dfd = dev_open(devname, O_RDONLY|O_EXCL);
123 if (dfd < 0) {
124 if (verbose >= 0)
125 fprintf(stderr, Name ": cannot open %s: %s.\n",
126 devname, strerror(errno));
127 return rv;
128 }
129 /* If the device is a container, we do something very different */
130 if (get_dev_size(dfd, devname, &size) == 0)
131 goto out;
132 if (size == 0) {
133 if (!st)
134 st = super_by_fd(dfd, NULL);
135 if (st)
136 rv = st->ss->load_container(st, dfd, NULL);
137
138 close(dfd);
139 if (!rv && st->ss->container_content)
140 return Incremental_container(st, devname, homehost,
141 verbose, runstop, autof);
142
143 fprintf(stderr, Name ": %s is not part of an md array.\n",
144 devname);
145 return rv;
146 }
147
148 /* 1/ Check if device is permitted by mdadm.conf */
149
150 if (!conf_test_dev(devname)) {
151 if (verbose >= 0)
152 fprintf(stderr, Name
153 ": %s not permitted by mdadm.conf.\n",
154 devname);
155 goto out;
156 }
157
158 /* 2/ Find metadata, reject if none appropriate (check
159 * version/name from args) */
160
161 if (fstat(dfd, &stb) < 0) {
162 if (verbose >= 0)
163 fprintf(stderr, Name ": fstat failed for %s: %s.\n",
164 devname, strerror(errno));
165 goto out;
166 }
167 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
168 if (verbose >= 0)
169 fprintf(stderr, Name ": %s is not a block device.\n",
170 devname);
171 goto out;
172 }
173
174 dinfo.disk.major = major(stb.st_rdev);
175 dinfo.disk.minor = minor(stb.st_rdev);
176
177 policy = disk_policy(&dinfo);
178
179 if (st == NULL && (st = guess_super(dfd)) == NULL) {
180 if (verbose >= 0)
181 fprintf(stderr, Name
182 ": no recognisable superblock on %s.\n",
183 devname);
184 rv = try_spare(devname, &dfd, policy, st, verbose);
185 goto out;
186 }
187 if (st->ss->compare_super == NULL ||
188 st->ss->load_super(st, dfd, NULL)) {
189 if (verbose >= 0)
190 fprintf(stderr, Name ": no RAID superblock on %s.\n",
191 devname);
192 rv = try_spare(devname, &dfd, policy, st, verbose);
193 free(st);
194 goto out;
195 }
196 close (dfd); dfd = -1;
197
198 memset(&info, 0, sizeof(info));
199 st->ss->getinfo_super(st, &info, NULL);
200
201 /* 3/ Check if there is a match in mdadm.conf */
202 match = search_mdstat(st, &info, devname, verbose, &rv);
203 if (!match && rv == 2)
204 goto out;
205
206 if (match && match->devname
207 && strcasecmp(match->devname, "<ignore>") == 0) {
208 if (verbose >= 0)
209 fprintf(stderr, Name ": array containing %s is explicitly"
210 " ignored by mdadm.conf\n",
211 devname);
212 goto out;
213 }
214
215 /* 3a/ if not, check for homehost match. If no match, continue
216 * but don't trust the 'name' in the array. Thus a 'random' minor
217 * number will be assigned, and the device name will be based
218 * on that. */
219 if (match)
220 trustworthy = LOCAL;
221 else if (st->ss->match_home(st, homehost) == 1)
222 trustworthy = LOCAL;
223 else if (st->ss->match_home(st, "any") == 1)
224 trustworthy = LOCAL_ANY;
225 else
226 trustworthy = FOREIGN;
227
228
229 if (!match && !conf_test_metadata(st->ss->name, policy,
230 (trustworthy == LOCAL))) {
231 if (verbose >= 1)
232 fprintf(stderr, Name
233 ": %s has metadata type %s for which "
234 "auto-assembly is disabled\n",
235 devname, st->ss->name);
236 goto out;
237 }
238 if (trustworthy == LOCAL_ANY)
239 trustworthy = LOCAL;
240
241 /* There are three possible sources for 'autof': command line,
242 * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf.
243 * ARRAY takes precedence, then command line, then
244 * CREATE.
245 */
246 if (match && match->autof)
247 autof = match->autof;
248 if (autof == 0)
249 autof = ci->autof;
250
251 name_to_use = info.name;
252 if (name_to_use[0] == 0 &&
253 info.array.level == LEVEL_CONTAINER &&
254 trustworthy == LOCAL) {
255 name_to_use = info.text_version;
256 trustworthy = METADATA;
257 }
258 if (name_to_use[0] && trustworthy != LOCAL &&
259 ! require_homehost &&
260 conf_name_is_free(name_to_use))
261 trustworthy = LOCAL;
262
263 /* strip "hostname:" prefix from name if we have decided
264 * to treat it as LOCAL
265 */
266 if (trustworthy == LOCAL && strchr(name_to_use, ':') != NULL)
267 name_to_use = strchr(name_to_use, ':')+1;
268
269 /* 4/ Check if array exists.
270 */
271 if (map_lock(&map))
272 fprintf(stderr, Name ": failed to get exclusive lock on "
273 "mapfile\n");
274 mp = map_by_uuid(&map, info.uuid);
275 if (mp)
276 mdfd = open_dev(mp->devnum);
277 else
278 mdfd = -1;
279
280 if (mdfd < 0) {
281 struct mdinfo *sra;
282
283 /* Couldn't find an existing array, maybe make a new one */
284 mdfd = create_mddev(match ? match->devname : NULL,
285 name_to_use, autof, trustworthy, chosen_name);
286
287 if (mdfd < 0)
288 goto out;
289
290 sysfs_init(&info, mdfd, 0);
291
292 if (set_array_info(mdfd, st, &info) != 0) {
293 fprintf(stderr, Name ": failed to set array info for %s: %s\n",
294 chosen_name, strerror(errno));
295 rv = 2;
296 goto out;
297 }
298
299 dinfo = info;
300 dinfo.disk.major = major(stb.st_rdev);
301 dinfo.disk.minor = minor(stb.st_rdev);
302 if (add_disk(mdfd, st, &info, &dinfo) != 0) {
303 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
304 devname, chosen_name, strerror(errno));
305 ioctl(mdfd, STOP_ARRAY, 0);
306 rv = 2;
307 goto out;
308 }
309 sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
310 if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
311 /* It really should be 'none' - must be old buggy
312 * kernel, and mdadm -I may not be able to complete.
313 * So reject it.
314 */
315 ioctl(mdfd, STOP_ARRAY, NULL);
316 fprintf(stderr, Name
317 ": You have an old buggy kernel which cannot support\n"
318 " --incremental reliably. Aborting.\n");
319 sysfs_free(sra);
320 rv = 2;
321 goto out;
322 }
323 info.array.working_disks = 1;
324 sysfs_free(sra);
325 /* 6/ Make sure /var/run/mdadm.map contains this array. */
326 map_update(&map, fd2devnum(mdfd),
327 info.text_version,
328 info.uuid, chosen_name);
329 } else {
330 /* 5b/ if it does */
331 /* - check one drive in array to make sure metadata is a reasonably */
332 /* close match. Reject if not (e.g. different type) */
333 /* - add the device */
334 char dn[20];
335 int dfd2;
336 int err;
337 struct mdinfo *sra;
338 struct supertype *st2;
339 struct mdinfo info2, *d;
340
341 if (mp->path)
342 strcpy(chosen_name, mp->path);
343 else
344 strcpy(chosen_name, devnum2devname(mp->devnum));
345
346 /* It is generally not OK to add non-spare drives to a
347 * running array as they are probably missing because
348 * they failed. However if runstop is 1, then the
349 * array was possibly started early and our best bet is
350 * to add this anyway.
351 * Also if action policy is re-add or better we allow
352 * re-add
353 */
354 if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
355 && ! policy_action_allows(policy, st->ss->name,
356 act_re_add)
357 && runstop < 1) {
358 int active = 0;
359
360 if (st->ss->external) {
361 char *devname = devnum2devname(fd2devnum(mdfd));
362
363 active = devname && is_container_active(devname);
364 free(devname);
365 } else if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0)
366 active = 1;
367 if (active) {
368 fprintf(stderr, Name
369 ": not adding %s to active array (without --run) %s\n",
370 devname, chosen_name);
371 rv = 2;
372 goto out;
373 }
374 }
375 sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
376 if (!sra) {
377 rv = 2;
378 goto out;
379 }
380 if (sra->devs) {
381 sprintf(dn, "%d:%d", sra->devs->disk.major,
382 sra->devs->disk.minor);
383 dfd2 = dev_open(dn, O_RDONLY);
384 st2 = dup_super(st);
385 if (st2->ss->load_super(st2, dfd2, NULL) ||
386 st->ss->compare_super(st, st2) != 0) {
387 fprintf(stderr, Name
388 ": metadata mismatch between %s and "
389 "chosen array %s\n",
390 devname, chosen_name);
391 close(dfd2);
392 rv = 2;
393 goto out;
394 }
395 close(dfd2);
396 memset(&info2, 0, sizeof(info2));
397 st2->ss->getinfo_super(st2, &info2, NULL);
398 st2->ss->free_super(st2);
399 if (info.array.level != info2.array.level ||
400 memcmp(info.uuid, info2.uuid, 16) != 0 ||
401 info.array.raid_disks != info2.array.raid_disks) {
402 fprintf(stderr, Name
403 ": unexpected difference between %s and %s.\n",
404 chosen_name, devname);
405 rv = 2;
406 goto out;
407 }
408 }
409 info2.disk.major = major(stb.st_rdev);
410 info2.disk.minor = minor(stb.st_rdev);
411 /* add disk needs to know about containers */
412 if (st->ss->external)
413 sra->array.level = LEVEL_CONTAINER;
414 err = add_disk(mdfd, st, sra, &info2);
415 if (err < 0 && errno == EBUSY) {
416 /* could be another device present with the same
417 * disk.number. Find and reject any such
418 */
419 find_reject(mdfd, st, sra, info.disk.number,
420 info.events, verbose, chosen_name);
421 err = add_disk(mdfd, st, sra, &info2);
422 }
423 if (err < 0) {
424 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
425 devname, chosen_name, strerror(errno));
426 rv = 2;
427 goto out;
428 }
429 info.array.working_disks = 0;
430 for (d = sra->devs; d; d=d->next)
431 info.array.working_disks ++;
432
433 }
434
435 /* 7/ Is there enough devices to possibly start the array? */
436 /* 7a/ if not, finish with success. */
437 if (info.array.level == LEVEL_CONTAINER) {
438 /* Try to assemble within the container */
439 map_unlock(&map);
440 sysfs_uevent(&info, "change");
441 if (verbose >= 0)
442 fprintf(stderr, Name
443 ": container %s now has %d devices\n",
444 chosen_name, info.array.working_disks);
445 wait_for(chosen_name, mdfd);
446 close(mdfd);
447 rv = Incremental(chosen_name, verbose, runstop,
448 NULL, homehost, require_homehost, autof);
449 if (rv == 1)
450 /* Don't fail the whole -I if a subarray didn't
451 * have enough devices to start yet
452 */
453 rv = 0;
454 return rv;
455 }
456 avail = NULL;
457 active_disks = count_active(st, mdfd, &avail, &info);
458 if (enough(info.array.level, info.array.raid_disks,
459 info.array.layout, info.array.state & 1,
460 avail, active_disks) == 0) {
461 free(avail);
462 if (verbose >= 0)
463 fprintf(stderr, Name
464 ": %s attached to %s, not enough to start (%d).\n",
465 devname, chosen_name, active_disks);
466 map_unlock(&map);
467 rv = 0;
468 goto out;
469 }
470 free(avail);
471
472 /* 7b/ if yes, */
473 /* - if number of OK devices match expected, or -R and there */
474 /* are enough, */
475 /* + add any bitmap file */
476 /* + start the array (auto-readonly). */
477
478 if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
479 if (verbose >= 0)
480 fprintf(stderr, Name
481 ": %s attached to %s which is already active.\n",
482 devname, chosen_name);
483 map_unlock(&map);
484 rv = 0;
485 goto out;
486 }
487
488 map_unlock(&map);
489 if (runstop > 0 || active_disks >= info.array.working_disks) {
490 struct mdinfo *sra, *dsk;
491 /* Let's try to start it */
492 if (match && match->bitmap_file) {
493 int bmfd = open(match->bitmap_file, O_RDWR);
494 if (bmfd < 0) {
495 fprintf(stderr, Name
496 ": Could not open bitmap file %s.\n",
497 match->bitmap_file);
498 goto out;
499 }
500 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
501 close(bmfd);
502 fprintf(stderr, Name
503 ": Failed to set bitmapfile for %s.\n",
504 chosen_name);
505 goto out;
506 }
507 close(bmfd);
508 }
509 /* GET_* needed so add_disk works below */
510 sra = sysfs_read(mdfd, fd2devnum(mdfd),
511 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
512 if ((sra == NULL || active_disks >= info.array.working_disks)
513 && trustworthy != FOREIGN)
514 rv = ioctl(mdfd, RUN_ARRAY, NULL);
515 else
516 rv = sysfs_set_str(sra, NULL,
517 "array_state", "read-auto");
518 if (rv == 0) {
519 if (verbose >= 0)
520 fprintf(stderr, Name
521 ": %s attached to %s, which has been started.\n",
522 devname, chosen_name);
523 rv = 0;
524 wait_for(chosen_name, mdfd);
525 /* We just started the array, so some devices
526 * might have been evicted from the array
527 * because their event counts were too old.
528 * If the action=re-add policy is in-force for
529 * those devices we should re-add them now.
530 */
531 for (dsk = sra->devs; dsk ; dsk = dsk->next) {
532 if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
533 add_disk(mdfd, st, sra, dsk) == 0)
534 fprintf(stderr, Name
535 ": %s re-added to %s\n",
536 dsk->sys_name, chosen_name);
537 }
538 } else {
539 fprintf(stderr, Name
540 ": %s attached to %s, but failed to start: %s.\n",
541 devname, chosen_name, strerror(errno));
542 rv = 1;
543 }
544 } else {
545 if (verbose >= 0)
546 fprintf(stderr, Name
547 ": %s attached to %s, not enough to start safely.\n",
548 devname, chosen_name);
549 rv = 0;
550 }
551 out:
552 if (dfd >= 0)
553 close(dfd);
554 if (mdfd >= 0)
555 close(mdfd);
556 if (policy)
557 dev_policy_free(policy);
558 return rv;
559 }
560
561 static struct mddev_ident_s *search_mdstat(struct supertype *st,
562 struct mdinfo *info,
563 char *devname,
564 int verbose, int *rvp)
565 {
566 struct mddev_ident_s *array_list, *match;
567 array_list = conf_get_ident(NULL);
568 match = NULL;
569 for (; array_list; array_list = array_list->next) {
570 if (array_list->uuid_set &&
571 same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
572 == 0) {
573 if (verbose >= 2 && array_list->devname)
574 fprintf(stderr, Name
575 ": UUID differs from %s.\n",
576 array_list->devname);
577 continue;
578 }
579 if (array_list->name[0] &&
580 strcasecmp(array_list->name, info->name) != 0) {
581 if (verbose >= 2 && array_list->devname)
582 fprintf(stderr, Name
583 ": Name differs from %s.\n",
584 array_list->devname);
585 continue;
586 }
587 if (array_list->devices &&
588 !match_oneof(array_list->devices, devname)) {
589 if (verbose >= 2 && array_list->devname)
590 fprintf(stderr, Name
591 ": Not a listed device for %s.\n",
592 array_list->devname);
593 continue;
594 }
595 if (array_list->super_minor != UnSet &&
596 array_list->super_minor != info->array.md_minor) {
597 if (verbose >= 2 && array_list->devname)
598 fprintf(stderr, Name
599 ": Different super-minor to %s.\n",
600 array_list->devname);
601 continue;
602 }
603 if (!array_list->uuid_set &&
604 !array_list->name[0] &&
605 !array_list->devices &&
606 array_list->super_minor == UnSet) {
607 if (verbose >= 2 && array_list->devname)
608 fprintf(stderr, Name
609 ": %s doesn't have any identifying information.\n",
610 array_list->devname);
611 continue;
612 }
613 /* FIXME, should I check raid_disks and level too?? */
614
615 if (match) {
616 if (verbose >= 0) {
617 if (match->devname && array_list->devname)
618 fprintf(stderr, Name
619 ": we match both %s and %s - cannot decide which to use.\n",
620 match->devname, array_list->devname);
621 else
622 fprintf(stderr, Name
623 ": multiple lines in mdadm.conf match\n");
624 }
625 *rvp = 2;
626 match = NULL;
627 break;
628 }
629 match = array_list;
630 }
631 return match;
632 }
633
634 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
635 int number, __u64 events, int verbose,
636 char *array_name)
637 {
638 /* Find a device attached to this array with a disk.number of number
639 * and events less than the passed events, and remove the device.
640 */
641 struct mdinfo *d;
642 mdu_array_info_t ra;
643
644 if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
645 return; /* not safe to remove from active arrays
646 * without thinking more */
647
648 for (d = sra->devs; d ; d = d->next) {
649 char dn[10];
650 int dfd;
651 struct mdinfo info;
652 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
653 dfd = dev_open(dn, O_RDONLY);
654 if (dfd < 0)
655 continue;
656 if (st->ss->load_super(st, dfd, NULL)) {
657 close(dfd);
658 continue;
659 }
660 st->ss->getinfo_super(st, &info, NULL);
661 st->ss->free_super(st);
662 close(dfd);
663
664 if (info.disk.number != number ||
665 info.events >= events)
666 continue;
667
668 if (d->disk.raid_disk > -1)
669 sysfs_set_str(sra, d, "slot", "none");
670 if (sysfs_set_str(sra, d, "state", "remove") == 0)
671 if (verbose >= 0)
672 fprintf(stderr, Name
673 ": removing old device %s from %s\n",
674 d->sys_name+4, array_name);
675 }
676 }
677
678 static int count_active(struct supertype *st, int mdfd, char **availp,
679 struct mdinfo *bestinfo)
680 {
681 /* count how many devices in sra think they are active */
682 struct mdinfo *d;
683 int cnt = 0, cnt1 = 0;
684 __u64 max_events = 0;
685 struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
686 char *avail = NULL;
687
688 if (!sra)
689 return 0;
690
691 for (d = sra->devs ; d ; d = d->next) {
692 char dn[30];
693 int dfd;
694 int ok;
695 struct mdinfo info;
696
697 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
698 dfd = dev_open(dn, O_RDONLY);
699 if (dfd < 0)
700 continue;
701 ok = st->ss->load_super(st, dfd, NULL);
702 close(dfd);
703 if (ok != 0)
704 continue;
705 st->ss->getinfo_super(st, &info, NULL);
706 if (!avail) {
707 avail = malloc(info.array.raid_disks);
708 if (!avail) {
709 fprintf(stderr, Name ": out of memory.\n");
710 exit(1);
711 }
712 memset(avail, 0, info.array.raid_disks);
713 *availp = avail;
714 }
715
716 if (info.disk.state & (1<<MD_DISK_SYNC))
717 {
718 if (cnt == 0) {
719 cnt++;
720 max_events = info.events;
721 avail[info.disk.raid_disk] = 2;
722 st->ss->getinfo_super(st, bestinfo, NULL);
723 } else if (info.events == max_events) {
724 cnt++;
725 avail[info.disk.raid_disk] = 2;
726 } else if (info.events == max_events-1) {
727 cnt1++;
728 avail[info.disk.raid_disk] = 1;
729 } else if (info.events < max_events - 1)
730 ;
731 else if (info.events == max_events+1) {
732 int i;
733 cnt1 = cnt;
734 cnt = 1;
735 max_events = info.events;
736 for (i=0; i<info.array.raid_disks; i++)
737 if (avail[i])
738 avail[i]--;
739 avail[info.disk.raid_disk] = 2;
740 st->ss->getinfo_super(st, bestinfo, NULL);
741 } else { /* info.events much bigger */
742 cnt = 1; cnt1 = 0;
743 memset(avail, 0, info.disk.raid_disk);
744 max_events = info.events;
745 avail[info.disk.raid_disk] = 2;
746 st->ss->getinfo_super(st, bestinfo, NULL);
747 }
748 }
749 st->ss->free_super(st);
750 }
751 return cnt + cnt1;
752 }
753
754 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
755 struct supertype *st, int verbose)
756 {
757 /* This device doesn't have any md metadata
758 * If it is 'bare' and theh device policy allows 'spare' look for
759 * an array or container to attach it to.
760 * If st is set, then only arrays of that type are considered
761 * Return 0 on success, or some exit code on failure, probably 1.
762 */
763 int rv = -1;
764 struct stat stb;
765 struct map_ent *mp, *map = NULL;
766 struct mdinfo *chosen = NULL;
767 int dfd = *dfdp;
768
769 if (fstat(dfd, &stb) != 0)
770 return 1;
771
772 /*
773 * Now we need to find a suitable array to add this to.
774 * We only accept arrays that:
775 * - match 'st'
776 * - are in the same domains as the device
777 * - are of an size for which the device will be useful
778 * and we choose the one that is the most degraded
779 */
780
781 if (map_lock(&map)) {
782 fprintf(stderr, Name ": failed to get exclusive lock on "
783 "mapfile\n");
784 return 1;
785 }
786 for (mp = map ; mp ; mp = mp->next) {
787 struct supertype *st2;
788 struct domainlist *dl = NULL;
789 struct mdinfo *sra;
790 unsigned long long devsize;
791
792 if (is_subarray(mp->metadata))
793 continue;
794 if (st) {
795 st2 = st->ss->match_metadata_desc(mp->metadata);
796 if (!st2 ||
797 (st->minor_version >= 0 &&
798 st->minor_version != st2->minor_version)) {
799 if (verbose > 1)
800 fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n",
801 devname, mp->path);
802 free(st2);
803 continue;
804 }
805 free(st2);
806 }
807 sra = sysfs_read(-1, mp->devnum,
808 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
809 GET_DEGRADED|GET_COMPONENT|GET_VERSION);
810 if (!sra) {
811 /* Probably a container - no degraded info */
812 sra = sysfs_read(-1, mp->devnum,
813 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
814 GET_COMPONENT|GET_VERSION);
815 if (sra)
816 sra->array.failed_disks = 0;
817 }
818 if (!sra)
819 continue;
820 if (st == NULL) {
821 int i;
822 st2 = NULL;
823 for(i=0; !st2 && superlist[i]; i++)
824 st2 = superlist[i]->match_metadata_desc(
825 sra->text_version);
826 } else
827 st2 = st;
828 get_dev_size(dfd, NULL, &devsize);
829 if (st2->ss->avail_size(st2, devsize) < sra->component_size) {
830 if (verbose > 1)
831 fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
832 devname, mp->path);
833 goto next;
834 }
835 dl = domain_from_array(sra, st2->ss->name);
836 if (!domain_test(dl, pol, st2->ss->name)) {
837 /* domain test fails */
838 if (verbose > 1)
839 fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n",
840 devname, mp->path);
841
842 goto next;
843 }
844 /* all tests passed, OK to add to this array */
845 if (!chosen) {
846 chosen = sra;
847 sra = NULL;
848 } else if (chosen->array.failed_disks < sra->array.failed_disks) {
849 sysfs_free(chosen);
850 chosen = sra;
851 sra = NULL;
852 }
853 next:
854 if (sra)
855 sysfs_free(sra);
856 if (st != st2)
857 free(st2);
858 if (dl)
859 domain_free(dl);
860 }
861 if (chosen) {
862 /* add current device to chosen array as a spare */
863 int mdfd = open_dev(devname2devnum(chosen->sys_name));
864 if (mdfd >= 0) {
865 struct mddev_dev_s devlist;
866 char devname[20];
867 devlist.next = NULL;
868 devlist.used = 0;
869 devlist.re_add = 0;
870 devlist.writemostly = 0;
871 devlist.devname = devname;
872 sprintf(devname, "%d:%d", major(stb.st_rdev),
873 minor(stb.st_rdev));
874 devlist.disposition = 'a';
875 close(dfd);
876 *dfdp = -1;
877 rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
878 -1, 0);
879 close(mdfd);
880 }
881 if (verbose > 0) {
882 if (rv == 0)
883 fprintf(stderr, Name ": added %s as spare for %s\n",
884 devname, chosen->sys_name);
885 else
886 fprintf(stderr, Name ": failed to add %s as spare for %s\n",
887 devname, chosen->sys_name);
888 }
889 sysfs_free(chosen);
890 }
891 return rv ? 0 : 1;
892 }
893
894 static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
895 struct supertype *st, int verbose)
896 {
897 /* we know that at least one partition virtual-metadata is
898 * allowed to incorporate spares like this device. We need to
899 * find a suitable device to copy partition information from.
900 *
901 * Getting a list of all disk (not partition) devices is
902 * slightly non-trivial. We could look at /sys/block, but
903 * that is theoretically due to be removed. Maybe best to use
904 * /dev/disk/by-path/?* and ignore names ending '-partNN' as
905 * we depend on this directory of 'path' info. But that fails
906 * to find loop devices and probably others. Maybe don't
907 * worry about that, they aren't the real target.
908 *
909 * So: check things in /dev/disk/by-path to see if they are in
910 * a compatible domain, then load the partition table and see
911 * if it is OK for the new device, and choose the largest
912 * partition table that fits.
913 */
914 DIR *dir;
915 struct dirent *de;
916 char *chosen = NULL;
917 unsigned long long chosen_size;
918 struct supertype *chosen_st = NULL;
919 int fd;
920
921 dir = opendir("/dev/disk/by-path");
922 if (!dir)
923 return 1;
924 while ((de = readdir(dir)) != NULL) {
925 char *ep;
926 struct dev_policy *pol2 = NULL;
927 struct domainlist *domlist = NULL;
928 int fd = -1;
929 struct mdinfo info;
930 struct supertype *st2 = NULL;
931 char *devname = NULL;
932 unsigned long long devsectors;
933
934 if (de->d_ino == 0 ||
935 de->d_name[0] == '.' ||
936 (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
937 goto next;
938
939 ep = de->d_name + strlen(de->d_name);
940 while (ep > de->d_name &&
941 isdigit(ep[-1]))
942 ep--;
943 if (ep > de->d_name + 5 &&
944 strncmp(ep-5, "-part", 5) == 0)
945 /* This is a partition - skip it */
946 goto next;
947
948 pol2 = path_policy(de->d_name, type_disk);
949
950 domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
951 if (domain_test(domlist, pol, st ? st->ss->name : NULL) == 0)
952 /* new device is incompatible with this device. */
953 goto next;
954
955 domain_free(domlist);
956 domlist = NULL;
957
958 asprintf(&devname, "/dev/disk/by-path/%s", de->d_name);
959 fd = open(devname, O_RDONLY);
960 if (fd < 0)
961 goto next;
962 if (get_dev_size(fd, devname, &devsectors) == 0)
963 goto next;
964 devsectors >>= 9;
965
966 if (st)
967 st2 = dup_super(st);
968 else
969 st2 = guess_super_type(fd, guess_partitions);
970 if (st2 == NULL ||
971 st2->ss->load_super(st2, fd, NULL) < 0)
972 goto next;
973
974 if (!st) {
975 /* Check domain policy again, this time referring to metadata */
976 domain_merge(&domlist, pol2, st2->ss->name);
977 if (domain_test(domlist, pol, st2->ss->name) == 0)
978 /* Incompatible devices for this metadata type */
979 goto next;
980 }
981
982 st2->ss->getinfo_super(st2, &info, NULL);
983 if (info.component_size > devsectors)
984 /* This partitioning doesn't fit in the device */
985 goto next;
986
987 /* This is an acceptable device to copy partition
988 * metadata from. We could just stop here, but I
989 * think I want to keep looking incase a larger
990 * metadata which makes better use of the device can
991 * be found.
992 */
993 if (chosen == NULL ||
994 chosen_size < info.component_size) {
995 chosen_size = info.component_size;
996 free(chosen);
997 chosen = devname;
998 devname = NULL;
999 if (chosen_st) {
1000 chosen_st->ss->free_super(chosen_st);
1001 free(chosen_st);
1002 }
1003 chosen_st = st2;
1004 st2 = NULL;
1005 }
1006
1007 next:
1008 free(devname);
1009 domain_free(domlist);
1010 dev_policy_free(pol2);
1011 if (st2)
1012 st2->ss->free_super(st2);
1013 free(st2);
1014
1015 if (fd >= 0)
1016 close(fd);
1017 }
1018
1019 if (!chosen)
1020 return 1;
1021
1022 /* 'chosen' is the best device we can find. Let's write its
1023 * metadata to devname dfd is read-only so don't use that
1024 */
1025 fd = open(devname, O_RDWR);
1026 if (fd >= 0) {
1027 chosen_st->ss->store_super(chosen_st, fd);
1028 close(fd);
1029 }
1030 free(chosen);
1031 chosen_st->ss->free_super(chosen_st);
1032 free(chosen_st);
1033 return 0;
1034 }
1035
1036
1037 /* adding a spare to a regular array is quite different from adding one to
1038 * a set-of-partitions virtual array.
1039 * This function determines which is worth trying and tries as appropriate.
1040 * Arrays are given priority over partitions.
1041 */
1042 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
1043 struct supertype *st, int verbose)
1044 {
1045 int i;
1046 int rv;
1047 int arrays_ok = 0;
1048 int partitions_ok = 0;
1049 char bufpad[4096 + 4096];
1050 char *buf = (char*)(((long)bufpad + 4096) & ~4095);
1051 int dfd = *dfdp;
1052
1053 /* Can only add a spare if device has at least one domains */
1054 if (pol_find(pol, pol_domain) == NULL)
1055 return 1;
1056 /* And only if some action allows spares */
1057 if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
1058 return 1;
1059
1060 /* Now check if the device is bare - we don't add non-bare devices
1061 * yet even if action=-spare
1062 */
1063
1064 if (lseek(dfd, 0, SEEK_SET) != 0 ||
1065 read(dfd, buf, 4096) != 4096) {
1066 not_bare:
1067 if (verbose > 1)
1068 fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
1069 devname);
1070 return 1;
1071 }
1072 if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
1073 goto not_bare;
1074 if (memcmp(buf, buf+1, 4095) != 0)
1075 goto not_bare;
1076
1077 /* OK, first 4K appear blank, try the end. */
1078 if (lseek(dfd, -4096, SEEK_END) < 0 ||
1079 read(dfd, buf, 4096) != 4096)
1080 goto not_bare;
1081
1082 if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
1083 goto not_bare;
1084 if (memcmp(buf, buf+1, 4095) != 0)
1085 goto not_bare;
1086
1087 /* This device passes our test for 'is bare'.
1088 * Let's see what policy allows for such things.
1089 */
1090 if (st) {
1091 /* just try try 'array' or 'partition' based on this metadata */
1092 if (st->ss->add_to_super)
1093 return array_try_spare(devname, dfdp, pol,
1094 st, verbose);
1095 else
1096 return partition_try_spare(devname, dfdp, pol,
1097 st, verbose);
1098 }
1099 /* Now see which metadata type support spare */
1100 for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
1101 if (superlist[i]->add_to_super && !arrays_ok &&
1102 policy_action_allows(pol, superlist[i]->name, act_spare))
1103 arrays_ok = 1;
1104 if (superlist[i]->add_to_super == NULL && !partitions_ok &&
1105 policy_action_allows(pol, superlist[i]->name, act_spare))
1106 partitions_ok = 1;
1107 }
1108 rv = 0;
1109 if (arrays_ok)
1110 rv = array_try_spare(devname, dfdp, pol, st, verbose);
1111 if (rv == 0 && partitions_ok)
1112 rv = partition_try_spare(devname, dfdp, pol, st, verbose);
1113 return rv;
1114 }
1115
1116 int IncrementalScan(int verbose)
1117 {
1118 /* look at every device listed in the 'map' file.
1119 * If one is found that is not running then:
1120 * look in mdadm.conf for bitmap file.
1121 * if one exists, but array has none, add it.
1122 * try to start array in auto-readonly mode
1123 */
1124 struct map_ent *mapl = NULL;
1125 struct map_ent *me;
1126 mddev_ident_t devs, mddev;
1127 int rv = 0;
1128
1129 map_read(&mapl);
1130 devs = conf_get_ident(NULL);
1131
1132 for (me = mapl ; me ; me = me->next) {
1133 mdu_array_info_t array;
1134 mdu_bitmap_file_t bmf;
1135 struct mdinfo *sra;
1136 int mdfd = open_dev(me->devnum);
1137
1138 if (mdfd < 0)
1139 continue;
1140 if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
1141 errno != ENODEV) {
1142 close(mdfd);
1143 continue;
1144 }
1145 /* Ok, we can try this one. Maybe it needs a bitmap */
1146 for (mddev = devs ; mddev ; mddev = mddev->next)
1147 if (mddev->devname && me->path
1148 && devname_matches(mddev->devname, me->path))
1149 break;
1150 if (mddev && mddev->bitmap_file) {
1151 /*
1152 * Note: early kernels will wrongly fail this, so it
1153 * is a hint only
1154 */
1155 int added = -1;
1156 if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
1157 int bmfd = open(mddev->bitmap_file, O_RDWR);
1158 if (bmfd >= 0) {
1159 added = ioctl(mdfd, SET_BITMAP_FILE,
1160 bmfd);
1161 close(bmfd);
1162 }
1163 }
1164 if (verbose >= 0) {
1165 if (added == 0)
1166 fprintf(stderr, Name
1167 ": Added bitmap %s to %s\n",
1168 mddev->bitmap_file, me->path);
1169 else if (errno != EEXIST)
1170 fprintf(stderr, Name
1171 ": Failed to add bitmap to %s: %s\n",
1172 me->path, strerror(errno));
1173 }
1174 }
1175 sra = sysfs_read(mdfd, 0, 0);
1176 if (sra) {
1177 if (sysfs_set_str(sra, NULL,
1178 "array_state", "read-auto") == 0) {
1179 if (verbose >= 0)
1180 fprintf(stderr, Name
1181 ": started array %s\n",
1182 me->path ?: devnum2devname(me->devnum));
1183 } else {
1184 fprintf(stderr, Name
1185 ": failed to start array %s: %s\n",
1186 me->path ?: devnum2devname(me->devnum),
1187 strerror(errno));
1188 rv = 1;
1189 }
1190 }
1191 }
1192 return rv;
1193 }
1194
1195 static char *container2devname(char *devname)
1196 {
1197 char *mdname = NULL;
1198
1199 if (devname[0] == '/') {
1200 int fd = open(devname, O_RDONLY);
1201 if (fd >= 0) {
1202 mdname = devnum2devname(fd2devnum(fd));
1203 close(fd);
1204 }
1205 } else {
1206 int uuid[4];
1207 struct map_ent *mp, *map = NULL;
1208
1209 if (!parse_uuid(devname, uuid))
1210 return mdname;
1211 mp = map_by_uuid(&map, uuid);
1212 if (mp)
1213 mdname = devnum2devname(mp->devnum);
1214 map_free(map);
1215 }
1216
1217 return mdname;
1218 }
1219
1220 static int Incremental_container(struct supertype *st, char *devname,
1221 char *homehost, int verbose,
1222 int runstop, int autof)
1223 {
1224 /* Collect the contents of this container and for each
1225 * array, choose a device name and assemble the array.
1226 */
1227
1228 struct mdinfo *list;
1229 struct mdinfo *ra;
1230 struct map_ent *map = NULL;
1231 struct mdinfo info;
1232 int trustworthy;
1233 struct mddev_ident_s *match;
1234 int rv = 0;
1235
1236 memset(&info, 0, sizeof(info));
1237 st->ss->getinfo_super(st, &info, NULL);
1238
1239 if ((runstop > 0 && info.container_enough >= 0) ||
1240 info.container_enough > 0)
1241 /* pass */;
1242 else {
1243 if (verbose)
1244 fprintf(stderr, Name ": not enough devices to start the container\n");
1245 return 0;
1246 }
1247
1248 match = search_mdstat(st, &info, devname, verbose, &rv);
1249 if (match == NULL && rv == 2)
1250 return rv;
1251
1252 /* Need to compute 'trustworthy' */
1253 if (match)
1254 trustworthy = LOCAL;
1255 else if (st->ss->match_home(st, homehost) == 1)
1256 trustworthy = LOCAL;
1257 else if (st->ss->match_home(st, "any") == 1)
1258 trustworthy = LOCAL;
1259 else
1260 trustworthy = FOREIGN;
1261
1262 list = st->ss->container_content(st, NULL);
1263 if (map_lock(&map))
1264 fprintf(stderr, Name ": failed to get exclusive lock on "
1265 "mapfile\n");
1266
1267 for (ra = list ; ra ; ra = ra->next) {
1268 int mdfd;
1269 char chosen_name[1024];
1270 struct map_ent *mp;
1271 struct mddev_ident_s *match = NULL;
1272
1273 mp = map_by_uuid(&map, ra->uuid);
1274
1275 if (mp) {
1276 mdfd = open_dev(mp->devnum);
1277 if (mp->path)
1278 strcpy(chosen_name, mp->path);
1279 else
1280 strcpy(chosen_name, devnum2devname(mp->devnum));
1281 } else {
1282
1283 /* Check in mdadm.conf for container == devname and
1284 * member == ra->text_version after second slash.
1285 */
1286 char *sub = strchr(ra->text_version+1, '/');
1287 struct mddev_ident_s *array_list;
1288 if (sub) {
1289 sub++;
1290 array_list = conf_get_ident(NULL);
1291 } else
1292 array_list = NULL;
1293 for(; array_list ; array_list = array_list->next) {
1294 char *dn;
1295 if (array_list->member == NULL ||
1296 array_list->container == NULL)
1297 continue;
1298 if (strcmp(array_list->member, sub) != 0)
1299 continue;
1300 if (array_list->uuid_set &&
1301 !same_uuid(ra->uuid, array_list->uuid, st->ss->swapuuid))
1302 continue;
1303 dn = container2devname(array_list->container);
1304 if (dn == NULL)
1305 continue;
1306 if (strncmp(dn, ra->text_version+1,
1307 strlen(dn)) != 0 ||
1308 ra->text_version[strlen(dn)+1] != '/') {
1309 free(dn);
1310 continue;
1311 }
1312 free(dn);
1313 /* we have a match */
1314 match = array_list;
1315 if (verbose>0)
1316 fprintf(stderr, Name ": match found for member %s\n",
1317 array_list->member);
1318 break;
1319 }
1320
1321 if (match && match->devname &&
1322 strcasecmp(match->devname, "<ignore>") == 0) {
1323 if (verbose > 0)
1324 fprintf(stderr, Name ": array %s/%s is "
1325 "explicitly ignored by mdadm.conf\n",
1326 match->container, match->member);
1327 return 2;
1328 }
1329 if (match)
1330 trustworthy = LOCAL;
1331
1332 mdfd = create_mddev(match ? match->devname : NULL,
1333 ra->name,
1334 autof,
1335 trustworthy,
1336 chosen_name);
1337 }
1338
1339 if (mdfd < 0) {
1340 fprintf(stderr, Name ": failed to open %s: %s.\n",
1341 chosen_name, strerror(errno));
1342 return 2;
1343 }
1344
1345 assemble_container_content(st, mdfd, ra, runstop,
1346 chosen_name, verbose);
1347 }
1348 map_unlock(&map);
1349 return 0;
1350 }
1351
1352 /*
1353 * IncrementalRemove - Attempt to see if the passed in device belongs to any
1354 * raid arrays, and if so first fail (if needed) and then remove the device.
1355 *
1356 * @devname - The device we want to remove
1357 *
1358 * Note: the device name must be a kernel name like "sda", so
1359 * that we can find it in /proc/mdstat
1360 */
1361 int IncrementalRemove(char *devname, int verbose)
1362 {
1363 int mdfd;
1364 int rv;
1365 struct mdstat_ent *ent;
1366 struct mddev_dev_s devlist;
1367
1368 if (strchr(devname, '/')) {
1369 fprintf(stderr, Name ": incremental removal requires a "
1370 "kernel device name, not a file: %s\n", devname);
1371 return 1;
1372 }
1373 ent = mdstat_by_component(devname);
1374 if (!ent) {
1375 fprintf(stderr, Name ": %s does not appear to be a component "
1376 "of any array\n", devname);
1377 return 1;
1378 }
1379 mdfd = open_dev(ent->devnum);
1380 if (mdfd < 0) {
1381 fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev);
1382 return 1;
1383 }
1384 memset(&devlist, 0, sizeof(devlist));
1385 devlist.devname = devname;
1386 devlist.disposition = 'f';
1387 Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
1388 devlist.disposition = 'r';
1389 rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
1390 close(mdfd);
1391 return rv;
1392 }