]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Incremental.c
Add must_be_container helper.
[thirdparty/mdadm.git] / Incremental.c
1 /*
2 * Incremental.c - support --incremental. Part of:
3 * mdadm - manage Linux "md" devices aka RAID arrays.
4 *
5 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
24 * Paper: Neil Brown
25 * Novell Inc
26 * GPO Box Q1283
27 * QVB Post Office, NSW 1230
28 * Australia
29 */
30
31 #include "mdadm.h"
32 #include <dirent.h>
33 #include <ctype.h>
34
35 static int count_active(struct supertype *st, int mdfd, char **availp,
36 struct mdinfo *info);
37 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
38 int number, __u64 events, int verbose,
39 char *array_name);
40 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
41 struct supertype *st, int verbose);
42
43 static int Incremental_container(struct supertype *st, char *devname,
44 char *homehost,
45 int verbose, int runstop, int autof);
46
47 static struct mddev_ident *search_mdstat(struct supertype *st,
48 struct mdinfo *info,
49 char *devname,
50 int verbose, int *rvp);
51
52 int Incremental(char *devname, int verbose, int runstop,
53 struct supertype *st, char *homehost, int require_homehost,
54 int autof)
55 {
56 /* Add this device to an array, creating the array if necessary
57 * and starting the array if sensible or - if runstop>0 - if possible.
58 *
59 * This has several steps:
60 *
61 * 1/ Check if device is permitted by mdadm.conf, reject if not.
62 * 2/ Find metadata, reject if none appropriate (check
63 * version/name from args)
64 * 3/ Check if there is a match in mdadm.conf
65 * 3a/ if not, check for homehost match. If no match, assemble as
66 * a 'foreign' array.
67 * 4/ Determine device number.
68 * - If in mdadm.conf with std name, use that
69 * - UUID in /var/run/mdadm.map use that
70 * - If name is suggestive, use that. unless in use with different uuid.
71 * - Choose a free, high number.
72 * - Use a partitioned device unless strong suggestion not to.
73 * e.g. auto=md
74 * Don't choose partitioned for containers.
75 * 5/ Find out if array already exists
76 * 5a/ if it does not
77 * - choose a name, from mdadm.conf or 'name' field in array.
78 * - create the array
79 * - add the device
80 * 5b/ if it does
81 * - check one drive in array to make sure metadata is a reasonably
82 * close match. Reject if not (e.g. different type)
83 * - add the device
84 * 6/ Make sure /var/run/mdadm.map contains this array.
85 * 7/ Is there enough devices to possibly start the array?
86 * For a container, this means running Incremental_container.
87 * 7a/ if not, finish with success.
88 * 7b/ if yes,
89 * - read all metadata and arrange devices like -A does
90 * - if number of OK devices match expected, or -R and there are enough,
91 * start the array (auto-readonly).
92 */
93 struct stat stb;
94 struct mdinfo info, dinfo;
95 struct mddev_ident *match;
96 char chosen_name[1024];
97 int rv = 1;
98 struct map_ent *mp, *map = NULL;
99 int dfd = -1, mdfd = -1;
100 char *avail;
101 int active_disks;
102 int trustworthy;
103 char *name_to_use;
104 mdu_array_info_t ainf;
105 struct dev_policy *policy = NULL;
106
107 struct createinfo *ci = conf_get_create_info();
108
109 if (stat(devname, &stb) < 0) {
110 if (verbose >= 0)
111 fprintf(stderr, Name ": stat failed for %s: %s.\n",
112 devname, strerror(errno));
113 return rv;
114 }
115 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
116 if (verbose >= 0)
117 fprintf(stderr, Name ": %s is not a block device.\n",
118 devname);
119 return rv;
120 }
121 dfd = dev_open(devname, O_RDONLY|O_EXCL);
122 if (dfd < 0) {
123 if (verbose >= 0)
124 fprintf(stderr, Name ": cannot open %s: %s.\n",
125 devname, strerror(errno));
126 return rv;
127 }
128 /* If the device is a container, we do something very different */
129 if (must_be_container(dfd)) {
130 if (!st)
131 st = super_by_fd(dfd, NULL);
132 if (st)
133 rv = st->ss->load_container(st, dfd, NULL);
134
135 close(dfd);
136 if (!rv && st->ss->container_content)
137 return Incremental_container(st, devname, homehost,
138 verbose, runstop, autof);
139
140 fprintf(stderr, Name ": %s is not part of an md array.\n",
141 devname);
142 return rv;
143 }
144
145 /* 1/ Check if device is permitted by mdadm.conf */
146
147 if (!conf_test_dev(devname)) {
148 if (verbose >= 0)
149 fprintf(stderr, Name
150 ": %s not permitted by mdadm.conf.\n",
151 devname);
152 goto out;
153 }
154
155 /* 2/ Find metadata, reject if none appropriate (check
156 * version/name from args) */
157
158 if (fstat(dfd, &stb) < 0) {
159 if (verbose >= 0)
160 fprintf(stderr, Name ": fstat failed for %s: %s.\n",
161 devname, strerror(errno));
162 goto out;
163 }
164 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
165 if (verbose >= 0)
166 fprintf(stderr, Name ": %s is not a block device.\n",
167 devname);
168 goto out;
169 }
170
171 dinfo.disk.major = major(stb.st_rdev);
172 dinfo.disk.minor = minor(stb.st_rdev);
173
174 policy = disk_policy(&dinfo);
175
176 if (st == NULL && (st = guess_super(dfd)) == NULL) {
177 if (verbose >= 0)
178 fprintf(stderr, Name
179 ": no recognisable superblock on %s.\n",
180 devname);
181 rv = try_spare(devname, &dfd, policy, st, verbose);
182 goto out;
183 }
184 if (st->ss->compare_super == NULL ||
185 st->ss->load_super(st, dfd, NULL)) {
186 if (verbose >= 0)
187 fprintf(stderr, Name ": no RAID superblock on %s.\n",
188 devname);
189 rv = try_spare(devname, &dfd, policy, st, verbose);
190 free(st);
191 goto out;
192 }
193 close (dfd); dfd = -1;
194
195 memset(&info, 0, sizeof(info));
196 st->ss->getinfo_super(st, &info, NULL);
197
198 /* 3/ Check if there is a match in mdadm.conf */
199 match = search_mdstat(st, &info, devname, verbose, &rv);
200 if (!match && rv == 2)
201 goto out;
202
203 if (match && match->devname
204 && strcasecmp(match->devname, "<ignore>") == 0) {
205 if (verbose >= 0)
206 fprintf(stderr, Name ": array containing %s is explicitly"
207 " ignored by mdadm.conf\n",
208 devname);
209 goto out;
210 }
211
212 /* 3a/ if not, check for homehost match. If no match, continue
213 * but don't trust the 'name' in the array. Thus a 'random' minor
214 * number will be assigned, and the device name will be based
215 * on that. */
216 if (match)
217 trustworthy = LOCAL;
218 else if (st->ss->match_home(st, homehost) == 1)
219 trustworthy = LOCAL;
220 else if (st->ss->match_home(st, "any") == 1)
221 trustworthy = LOCAL_ANY;
222 else
223 trustworthy = FOREIGN;
224
225
226 if (!match && !conf_test_metadata(st->ss->name, policy,
227 (trustworthy == LOCAL))) {
228 if (verbose >= 1)
229 fprintf(stderr, Name
230 ": %s has metadata type %s for which "
231 "auto-assembly is disabled\n",
232 devname, st->ss->name);
233 goto out;
234 }
235 if (trustworthy == LOCAL_ANY)
236 trustworthy = LOCAL;
237
238 /* There are three possible sources for 'autof': command line,
239 * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf.
240 * ARRAY takes precedence, then command line, then
241 * CREATE.
242 */
243 if (match && match->autof)
244 autof = match->autof;
245 if (autof == 0)
246 autof = ci->autof;
247
248 name_to_use = info.name;
249 if (name_to_use[0] == 0 &&
250 info.array.level == LEVEL_CONTAINER &&
251 trustworthy == LOCAL) {
252 name_to_use = info.text_version;
253 trustworthy = METADATA;
254 }
255 if (name_to_use[0] && trustworthy != LOCAL &&
256 ! require_homehost &&
257 conf_name_is_free(name_to_use))
258 trustworthy = LOCAL;
259
260 /* strip "hostname:" prefix from name if we have decided
261 * to treat it as LOCAL
262 */
263 if (trustworthy == LOCAL && strchr(name_to_use, ':') != NULL)
264 name_to_use = strchr(name_to_use, ':')+1;
265
266 /* 4/ Check if array exists.
267 */
268 if (map_lock(&map))
269 fprintf(stderr, Name ": failed to get exclusive lock on "
270 "mapfile\n");
271 mp = map_by_uuid(&map, info.uuid);
272 if (mp)
273 mdfd = open_dev(mp->devnum);
274 else
275 mdfd = -1;
276
277 if (mdfd < 0) {
278 struct mdinfo *sra;
279
280 /* Couldn't find an existing array, maybe make a new one */
281 mdfd = create_mddev(match ? match->devname : NULL,
282 name_to_use, autof, trustworthy, chosen_name);
283
284 if (mdfd < 0)
285 goto out;
286
287 sysfs_init(&info, mdfd, 0);
288
289 if (set_array_info(mdfd, st, &info) != 0) {
290 fprintf(stderr, Name ": failed to set array info for %s: %s\n",
291 chosen_name, strerror(errno));
292 rv = 2;
293 goto out;
294 }
295
296 dinfo = info;
297 dinfo.disk.major = major(stb.st_rdev);
298 dinfo.disk.minor = minor(stb.st_rdev);
299 if (add_disk(mdfd, st, &info, &dinfo) != 0) {
300 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
301 devname, chosen_name, strerror(errno));
302 ioctl(mdfd, STOP_ARRAY, 0);
303 rv = 2;
304 goto out;
305 }
306 sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
307 if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
308 /* It really should be 'none' - must be old buggy
309 * kernel, and mdadm -I may not be able to complete.
310 * So reject it.
311 */
312 ioctl(mdfd, STOP_ARRAY, NULL);
313 fprintf(stderr, Name
314 ": You have an old buggy kernel which cannot support\n"
315 " --incremental reliably. Aborting.\n");
316 sysfs_free(sra);
317 rv = 2;
318 goto out;
319 }
320 info.array.working_disks = 1;
321 sysfs_free(sra);
322 /* 6/ Make sure /var/run/mdadm.map contains this array. */
323 map_update(&map, fd2devnum(mdfd),
324 info.text_version,
325 info.uuid, chosen_name);
326 } else {
327 /* 5b/ if it does */
328 /* - check one drive in array to make sure metadata is a reasonably */
329 /* close match. Reject if not (e.g. different type) */
330 /* - add the device */
331 char dn[20];
332 int dfd2;
333 int err;
334 struct mdinfo *sra;
335 struct supertype *st2;
336 struct mdinfo info2, *d;
337
338 if (mp->path)
339 strcpy(chosen_name, mp->path);
340 else
341 strcpy(chosen_name, devnum2devname(mp->devnum));
342
343 /* It is generally not OK to add non-spare drives to a
344 * running array as they are probably missing because
345 * they failed. However if runstop is 1, then the
346 * array was possibly started early and our best bet is
347 * to add this anyway.
348 * Also if action policy is re-add or better we allow
349 * re-add
350 */
351 if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
352 && ! policy_action_allows(policy, st->ss->name,
353 act_re_add)
354 && runstop < 1) {
355 int active = 0;
356
357 if (st->ss->external) {
358 char *devname = devnum2devname(fd2devnum(mdfd));
359
360 active = devname && is_container_active(devname);
361 free(devname);
362 } else if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0)
363 active = 1;
364 if (active) {
365 fprintf(stderr, Name
366 ": not adding %s to active array (without --run) %s\n",
367 devname, chosen_name);
368 rv = 2;
369 goto out;
370 }
371 }
372 sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
373 if (!sra) {
374 rv = 2;
375 goto out;
376 }
377 if (sra->devs) {
378 sprintf(dn, "%d:%d", sra->devs->disk.major,
379 sra->devs->disk.minor);
380 dfd2 = dev_open(dn, O_RDONLY);
381 st2 = dup_super(st);
382 if (st2->ss->load_super(st2, dfd2, NULL) ||
383 st->ss->compare_super(st, st2) != 0) {
384 fprintf(stderr, Name
385 ": metadata mismatch between %s and "
386 "chosen array %s\n",
387 devname, chosen_name);
388 close(dfd2);
389 rv = 2;
390 goto out;
391 }
392 close(dfd2);
393 memset(&info2, 0, sizeof(info2));
394 st2->ss->getinfo_super(st2, &info2, NULL);
395 st2->ss->free_super(st2);
396 if (info.array.level != info2.array.level ||
397 memcmp(info.uuid, info2.uuid, 16) != 0 ||
398 info.array.raid_disks != info2.array.raid_disks) {
399 fprintf(stderr, Name
400 ": unexpected difference between %s and %s.\n",
401 chosen_name, devname);
402 rv = 2;
403 goto out;
404 }
405 }
406 info2.disk.major = major(stb.st_rdev);
407 info2.disk.minor = minor(stb.st_rdev);
408 /* add disk needs to know about containers */
409 if (st->ss->external)
410 sra->array.level = LEVEL_CONTAINER;
411 err = add_disk(mdfd, st, sra, &info2);
412 if (err < 0 && errno == EBUSY) {
413 /* could be another device present with the same
414 * disk.number. Find and reject any such
415 */
416 find_reject(mdfd, st, sra, info.disk.number,
417 info.events, verbose, chosen_name);
418 err = add_disk(mdfd, st, sra, &info2);
419 }
420 if (err < 0) {
421 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
422 devname, chosen_name, strerror(errno));
423 rv = 2;
424 goto out;
425 }
426 info.array.working_disks = 0;
427 for (d = sra->devs; d; d=d->next)
428 info.array.working_disks ++;
429
430 }
431
432 /* 7/ Is there enough devices to possibly start the array? */
433 /* 7a/ if not, finish with success. */
434 if (info.array.level == LEVEL_CONTAINER) {
435 /* Try to assemble within the container */
436 map_unlock(&map);
437 sysfs_uevent(&info, "change");
438 if (verbose >= 0)
439 fprintf(stderr, Name
440 ": container %s now has %d devices\n",
441 chosen_name, info.array.working_disks);
442 wait_for(chosen_name, mdfd);
443 close(mdfd);
444 rv = Incremental(chosen_name, verbose, runstop,
445 NULL, homehost, require_homehost, autof);
446 if (rv == 1)
447 /* Don't fail the whole -I if a subarray didn't
448 * have enough devices to start yet
449 */
450 rv = 0;
451 return rv;
452 }
453 avail = NULL;
454 active_disks = count_active(st, mdfd, &avail, &info);
455 if (enough(info.array.level, info.array.raid_disks,
456 info.array.layout, info.array.state & 1,
457 avail, active_disks) == 0) {
458 free(avail);
459 if (verbose >= 0)
460 fprintf(stderr, Name
461 ": %s attached to %s, not enough to start (%d).\n",
462 devname, chosen_name, active_disks);
463 map_unlock(&map);
464 rv = 0;
465 goto out;
466 }
467 free(avail);
468
469 /* 7b/ if yes, */
470 /* - if number of OK devices match expected, or -R and there */
471 /* are enough, */
472 /* + add any bitmap file */
473 /* + start the array (auto-readonly). */
474
475 if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
476 if (verbose >= 0)
477 fprintf(stderr, Name
478 ": %s attached to %s which is already active.\n",
479 devname, chosen_name);
480 map_unlock(&map);
481 rv = 0;
482 goto out;
483 }
484
485 map_unlock(&map);
486 if (runstop > 0 || active_disks >= info.array.working_disks) {
487 struct mdinfo *sra, *dsk;
488 /* Let's try to start it */
489 if (match && match->bitmap_file) {
490 int bmfd = open(match->bitmap_file, O_RDWR);
491 if (bmfd < 0) {
492 fprintf(stderr, Name
493 ": Could not open bitmap file %s.\n",
494 match->bitmap_file);
495 goto out;
496 }
497 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
498 close(bmfd);
499 fprintf(stderr, Name
500 ": Failed to set bitmapfile for %s.\n",
501 chosen_name);
502 goto out;
503 }
504 close(bmfd);
505 }
506 /* GET_* needed so add_disk works below */
507 sra = sysfs_read(mdfd, fd2devnum(mdfd),
508 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
509 if ((sra == NULL || active_disks >= info.array.working_disks)
510 && trustworthy != FOREIGN)
511 rv = ioctl(mdfd, RUN_ARRAY, NULL);
512 else
513 rv = sysfs_set_str(sra, NULL,
514 "array_state", "read-auto");
515 if (rv == 0) {
516 if (verbose >= 0)
517 fprintf(stderr, Name
518 ": %s attached to %s, which has been started.\n",
519 devname, chosen_name);
520 rv = 0;
521 wait_for(chosen_name, mdfd);
522 /* We just started the array, so some devices
523 * might have been evicted from the array
524 * because their event counts were too old.
525 * If the action=re-add policy is in-force for
526 * those devices we should re-add them now.
527 */
528 for (dsk = sra->devs; dsk ; dsk = dsk->next) {
529 if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
530 add_disk(mdfd, st, sra, dsk) == 0)
531 fprintf(stderr, Name
532 ": %s re-added to %s\n",
533 dsk->sys_name, chosen_name);
534 }
535 } else {
536 fprintf(stderr, Name
537 ": %s attached to %s, but failed to start: %s.\n",
538 devname, chosen_name, strerror(errno));
539 rv = 1;
540 }
541 } else {
542 if (verbose >= 0)
543 fprintf(stderr, Name
544 ": %s attached to %s, not enough to start safely.\n",
545 devname, chosen_name);
546 rv = 0;
547 }
548 out:
549 if (dfd >= 0)
550 close(dfd);
551 if (mdfd >= 0)
552 close(mdfd);
553 if (policy)
554 dev_policy_free(policy);
555 return rv;
556 }
557
558 static struct mddev_ident *search_mdstat(struct supertype *st,
559 struct mdinfo *info,
560 char *devname,
561 int verbose, int *rvp)
562 {
563 struct mddev_ident *array_list, *match;
564 array_list = conf_get_ident(NULL);
565 match = NULL;
566 for (; array_list; array_list = array_list->next) {
567 if (array_list->uuid_set &&
568 same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
569 == 0) {
570 if (verbose >= 2 && array_list->devname)
571 fprintf(stderr, Name
572 ": UUID differs from %s.\n",
573 array_list->devname);
574 continue;
575 }
576 if (array_list->name[0] &&
577 strcasecmp(array_list->name, info->name) != 0) {
578 if (verbose >= 2 && array_list->devname)
579 fprintf(stderr, Name
580 ": Name differs from %s.\n",
581 array_list->devname);
582 continue;
583 }
584 if (array_list->devices &&
585 !match_oneof(array_list->devices, devname)) {
586 if (verbose >= 2 && array_list->devname)
587 fprintf(stderr, Name
588 ": Not a listed device for %s.\n",
589 array_list->devname);
590 continue;
591 }
592 if (array_list->super_minor != UnSet &&
593 array_list->super_minor != info->array.md_minor) {
594 if (verbose >= 2 && array_list->devname)
595 fprintf(stderr, Name
596 ": Different super-minor to %s.\n",
597 array_list->devname);
598 continue;
599 }
600 if (!array_list->uuid_set &&
601 !array_list->name[0] &&
602 !array_list->devices &&
603 array_list->super_minor == UnSet) {
604 if (verbose >= 2 && array_list->devname)
605 fprintf(stderr, Name
606 ": %s doesn't have any identifying information.\n",
607 array_list->devname);
608 continue;
609 }
610 /* FIXME, should I check raid_disks and level too?? */
611
612 if (match) {
613 if (verbose >= 0) {
614 if (match->devname && array_list->devname)
615 fprintf(stderr, Name
616 ": we match both %s and %s - cannot decide which to use.\n",
617 match->devname, array_list->devname);
618 else
619 fprintf(stderr, Name
620 ": multiple lines in mdadm.conf match\n");
621 }
622 *rvp = 2;
623 match = NULL;
624 break;
625 }
626 match = array_list;
627 }
628 return match;
629 }
630
631 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
632 int number, __u64 events, int verbose,
633 char *array_name)
634 {
635 /* Find a device attached to this array with a disk.number of number
636 * and events less than the passed events, and remove the device.
637 */
638 struct mdinfo *d;
639 mdu_array_info_t ra;
640
641 if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
642 return; /* not safe to remove from active arrays
643 * without thinking more */
644
645 for (d = sra->devs; d ; d = d->next) {
646 char dn[10];
647 int dfd;
648 struct mdinfo info;
649 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
650 dfd = dev_open(dn, O_RDONLY);
651 if (dfd < 0)
652 continue;
653 if (st->ss->load_super(st, dfd, NULL)) {
654 close(dfd);
655 continue;
656 }
657 st->ss->getinfo_super(st, &info, NULL);
658 st->ss->free_super(st);
659 close(dfd);
660
661 if (info.disk.number != number ||
662 info.events >= events)
663 continue;
664
665 if (d->disk.raid_disk > -1)
666 sysfs_set_str(sra, d, "slot", "none");
667 if (sysfs_set_str(sra, d, "state", "remove") == 0)
668 if (verbose >= 0)
669 fprintf(stderr, Name
670 ": removing old device %s from %s\n",
671 d->sys_name+4, array_name);
672 }
673 }
674
675 static int count_active(struct supertype *st, int mdfd, char **availp,
676 struct mdinfo *bestinfo)
677 {
678 /* count how many devices in sra think they are active */
679 struct mdinfo *d;
680 int cnt = 0, cnt1 = 0;
681 __u64 max_events = 0;
682 struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
683 char *avail = NULL;
684
685 if (!sra)
686 return 0;
687
688 for (d = sra->devs ; d ; d = d->next) {
689 char dn[30];
690 int dfd;
691 int ok;
692 struct mdinfo info;
693
694 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
695 dfd = dev_open(dn, O_RDONLY);
696 if (dfd < 0)
697 continue;
698 ok = st->ss->load_super(st, dfd, NULL);
699 close(dfd);
700 if (ok != 0)
701 continue;
702 st->ss->getinfo_super(st, &info, NULL);
703 if (!avail) {
704 avail = malloc(info.array.raid_disks);
705 if (!avail) {
706 fprintf(stderr, Name ": out of memory.\n");
707 exit(1);
708 }
709 memset(avail, 0, info.array.raid_disks);
710 *availp = avail;
711 }
712
713 if (info.disk.state & (1<<MD_DISK_SYNC))
714 {
715 if (cnt == 0) {
716 cnt++;
717 max_events = info.events;
718 avail[info.disk.raid_disk] = 2;
719 st->ss->getinfo_super(st, bestinfo, NULL);
720 } else if (info.events == max_events) {
721 cnt++;
722 avail[info.disk.raid_disk] = 2;
723 } else if (info.events == max_events-1) {
724 cnt1++;
725 avail[info.disk.raid_disk] = 1;
726 } else if (info.events < max_events - 1)
727 ;
728 else if (info.events == max_events+1) {
729 int i;
730 cnt1 = cnt;
731 cnt = 1;
732 max_events = info.events;
733 for (i=0; i<info.array.raid_disks; i++)
734 if (avail[i])
735 avail[i]--;
736 avail[info.disk.raid_disk] = 2;
737 st->ss->getinfo_super(st, bestinfo, NULL);
738 } else { /* info.events much bigger */
739 cnt = 1; cnt1 = 0;
740 memset(avail, 0, info.disk.raid_disk);
741 max_events = info.events;
742 avail[info.disk.raid_disk] = 2;
743 st->ss->getinfo_super(st, bestinfo, NULL);
744 }
745 }
746 st->ss->free_super(st);
747 }
748 return cnt + cnt1;
749 }
750
751 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
752 struct supertype *st, int verbose)
753 {
754 /* This device doesn't have any md metadata
755 * If it is 'bare' and theh device policy allows 'spare' look for
756 * an array or container to attach it to.
757 * If st is set, then only arrays of that type are considered
758 * Return 0 on success, or some exit code on failure, probably 1.
759 */
760 int rv = -1;
761 struct stat stb;
762 struct map_ent *mp, *map = NULL;
763 struct mdinfo *chosen = NULL;
764 int dfd = *dfdp;
765
766 if (fstat(dfd, &stb) != 0)
767 return 1;
768
769 /*
770 * Now we need to find a suitable array to add this to.
771 * We only accept arrays that:
772 * - match 'st'
773 * - are in the same domains as the device
774 * - are of an size for which the device will be useful
775 * and we choose the one that is the most degraded
776 */
777
778 if (map_lock(&map)) {
779 fprintf(stderr, Name ": failed to get exclusive lock on "
780 "mapfile\n");
781 return 1;
782 }
783 for (mp = map ; mp ; mp = mp->next) {
784 struct supertype *st2;
785 struct domainlist *dl = NULL;
786 struct mdinfo *sra;
787 unsigned long long devsize;
788
789 if (is_subarray(mp->metadata))
790 continue;
791 if (st) {
792 st2 = st->ss->match_metadata_desc(mp->metadata);
793 if (!st2 ||
794 (st->minor_version >= 0 &&
795 st->minor_version != st2->minor_version)) {
796 if (verbose > 1)
797 fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n",
798 devname, mp->path);
799 free(st2);
800 continue;
801 }
802 free(st2);
803 }
804 sra = sysfs_read(-1, mp->devnum,
805 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
806 GET_DEGRADED|GET_COMPONENT|GET_VERSION);
807 if (!sra) {
808 /* Probably a container - no degraded info */
809 sra = sysfs_read(-1, mp->devnum,
810 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
811 GET_COMPONENT|GET_VERSION);
812 if (sra)
813 sra->array.failed_disks = 0;
814 }
815 if (!sra)
816 continue;
817 if (st == NULL) {
818 int i;
819 st2 = NULL;
820 for(i=0; !st2 && superlist[i]; i++)
821 st2 = superlist[i]->match_metadata_desc(
822 sra->text_version);
823 } else
824 st2 = st;
825 get_dev_size(dfd, NULL, &devsize);
826 if (st2->ss->avail_size(st2, devsize) < sra->component_size) {
827 if (verbose > 1)
828 fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
829 devname, mp->path);
830 goto next;
831 }
832 dl = domain_from_array(sra, st2->ss->name);
833 if (!domain_test(dl, pol, st2->ss->name)) {
834 /* domain test fails */
835 if (verbose > 1)
836 fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n",
837 devname, mp->path);
838
839 goto next;
840 }
841 /* all tests passed, OK to add to this array */
842 if (!chosen) {
843 chosen = sra;
844 sra = NULL;
845 } else if (chosen->array.failed_disks < sra->array.failed_disks) {
846 sysfs_free(chosen);
847 chosen = sra;
848 sra = NULL;
849 }
850 next:
851 if (sra)
852 sysfs_free(sra);
853 if (st != st2)
854 free(st2);
855 if (dl)
856 domain_free(dl);
857 }
858 if (chosen) {
859 /* add current device to chosen array as a spare */
860 int mdfd = open_dev(devname2devnum(chosen->sys_name));
861 if (mdfd >= 0) {
862 struct mddev_dev devlist;
863 char devname[20];
864 devlist.next = NULL;
865 devlist.used = 0;
866 devlist.re_add = 0;
867 devlist.writemostly = 0;
868 devlist.devname = devname;
869 sprintf(devname, "%d:%d", major(stb.st_rdev),
870 minor(stb.st_rdev));
871 devlist.disposition = 'a';
872 close(dfd);
873 *dfdp = -1;
874 rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
875 -1, 0);
876 close(mdfd);
877 }
878 if (verbose > 0) {
879 if (rv == 0)
880 fprintf(stderr, Name ": added %s as spare for %s\n",
881 devname, chosen->sys_name);
882 else
883 fprintf(stderr, Name ": failed to add %s as spare for %s\n",
884 devname, chosen->sys_name);
885 }
886 sysfs_free(chosen);
887 }
888 return rv ? 0 : 1;
889 }
890
891 static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
892 struct supertype *st, int verbose)
893 {
894 /* we know that at least one partition virtual-metadata is
895 * allowed to incorporate spares like this device. We need to
896 * find a suitable device to copy partition information from.
897 *
898 * Getting a list of all disk (not partition) devices is
899 * slightly non-trivial. We could look at /sys/block, but
900 * that is theoretically due to be removed. Maybe best to use
901 * /dev/disk/by-path/?* and ignore names ending '-partNN' as
902 * we depend on this directory of 'path' info. But that fails
903 * to find loop devices and probably others. Maybe don't
904 * worry about that, they aren't the real target.
905 *
906 * So: check things in /dev/disk/by-path to see if they are in
907 * a compatible domain, then load the partition table and see
908 * if it is OK for the new device, and choose the largest
909 * partition table that fits.
910 */
911 DIR *dir;
912 struct dirent *de;
913 char *chosen = NULL;
914 unsigned long long chosen_size;
915 struct supertype *chosen_st = NULL;
916 int fd;
917
918 dir = opendir("/dev/disk/by-path");
919 if (!dir)
920 return 1;
921 while ((de = readdir(dir)) != NULL) {
922 char *ep;
923 struct dev_policy *pol2 = NULL;
924 struct domainlist *domlist = NULL;
925 int fd = -1;
926 struct mdinfo info;
927 struct supertype *st2 = NULL;
928 char *devname = NULL;
929 unsigned long long devsectors;
930
931 if (de->d_ino == 0 ||
932 de->d_name[0] == '.' ||
933 (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
934 goto next;
935
936 ep = de->d_name + strlen(de->d_name);
937 while (ep > de->d_name &&
938 isdigit(ep[-1]))
939 ep--;
940 if (ep > de->d_name + 5 &&
941 strncmp(ep-5, "-part", 5) == 0)
942 /* This is a partition - skip it */
943 goto next;
944
945 pol2 = path_policy(de->d_name, type_disk);
946
947 domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
948 if (domain_test(domlist, pol, st ? st->ss->name : NULL) == 0)
949 /* new device is incompatible with this device. */
950 goto next;
951
952 domain_free(domlist);
953 domlist = NULL;
954
955 asprintf(&devname, "/dev/disk/by-path/%s", de->d_name);
956 fd = open(devname, O_RDONLY);
957 if (fd < 0)
958 goto next;
959 if (get_dev_size(fd, devname, &devsectors) == 0)
960 goto next;
961 devsectors >>= 9;
962
963 if (st)
964 st2 = dup_super(st);
965 else
966 st2 = guess_super_type(fd, guess_partitions);
967 if (st2 == NULL ||
968 st2->ss->load_super(st2, fd, NULL) < 0)
969 goto next;
970
971 if (!st) {
972 /* Check domain policy again, this time referring to metadata */
973 domain_merge(&domlist, pol2, st2->ss->name);
974 if (domain_test(domlist, pol, st2->ss->name) == 0)
975 /* Incompatible devices for this metadata type */
976 goto next;
977 }
978
979 st2->ss->getinfo_super(st2, &info, NULL);
980 if (info.component_size > devsectors)
981 /* This partitioning doesn't fit in the device */
982 goto next;
983
984 /* This is an acceptable device to copy partition
985 * metadata from. We could just stop here, but I
986 * think I want to keep looking incase a larger
987 * metadata which makes better use of the device can
988 * be found.
989 */
990 if (chosen == NULL ||
991 chosen_size < info.component_size) {
992 chosen_size = info.component_size;
993 free(chosen);
994 chosen = devname;
995 devname = NULL;
996 if (chosen_st) {
997 chosen_st->ss->free_super(chosen_st);
998 free(chosen_st);
999 }
1000 chosen_st = st2;
1001 st2 = NULL;
1002 }
1003
1004 next:
1005 free(devname);
1006 domain_free(domlist);
1007 dev_policy_free(pol2);
1008 if (st2)
1009 st2->ss->free_super(st2);
1010 free(st2);
1011
1012 if (fd >= 0)
1013 close(fd);
1014 }
1015
1016 if (!chosen)
1017 return 1;
1018
1019 /* 'chosen' is the best device we can find. Let's write its
1020 * metadata to devname dfd is read-only so don't use that
1021 */
1022 fd = open(devname, O_RDWR);
1023 if (fd >= 0) {
1024 chosen_st->ss->store_super(chosen_st, fd);
1025 close(fd);
1026 }
1027 free(chosen);
1028 chosen_st->ss->free_super(chosen_st);
1029 free(chosen_st);
1030 return 0;
1031 }
1032
1033
1034 /* adding a spare to a regular array is quite different from adding one to
1035 * a set-of-partitions virtual array.
1036 * This function determines which is worth trying and tries as appropriate.
1037 * Arrays are given priority over partitions.
1038 */
1039 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
1040 struct supertype *st, int verbose)
1041 {
1042 int i;
1043 int rv;
1044 int arrays_ok = 0;
1045 int partitions_ok = 0;
1046 char bufpad[4096 + 4096];
1047 char *buf = (char*)(((long)bufpad + 4096) & ~4095);
1048 int dfd = *dfdp;
1049
1050 /* Can only add a spare if device has at least one domains */
1051 if (pol_find(pol, pol_domain) == NULL)
1052 return 1;
1053 /* And only if some action allows spares */
1054 if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
1055 return 1;
1056
1057 /* Now check if the device is bare - we don't add non-bare devices
1058 * yet even if action=-spare
1059 */
1060
1061 if (lseek(dfd, 0, SEEK_SET) != 0 ||
1062 read(dfd, buf, 4096) != 4096) {
1063 not_bare:
1064 if (verbose > 1)
1065 fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
1066 devname);
1067 return 1;
1068 }
1069 if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
1070 goto not_bare;
1071 if (memcmp(buf, buf+1, 4095) != 0)
1072 goto not_bare;
1073
1074 /* OK, first 4K appear blank, try the end. */
1075 if (lseek(dfd, -4096, SEEK_END) < 0 ||
1076 read(dfd, buf, 4096) != 4096)
1077 goto not_bare;
1078
1079 if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
1080 goto not_bare;
1081 if (memcmp(buf, buf+1, 4095) != 0)
1082 goto not_bare;
1083
1084 /* This device passes our test for 'is bare'.
1085 * Let's see what policy allows for such things.
1086 */
1087 if (st) {
1088 /* just try try 'array' or 'partition' based on this metadata */
1089 if (st->ss->add_to_super)
1090 return array_try_spare(devname, dfdp, pol,
1091 st, verbose);
1092 else
1093 return partition_try_spare(devname, dfdp, pol,
1094 st, verbose);
1095 }
1096 /* Now see which metadata type support spare */
1097 for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
1098 if (superlist[i]->add_to_super && !arrays_ok &&
1099 policy_action_allows(pol, superlist[i]->name, act_spare))
1100 arrays_ok = 1;
1101 if (superlist[i]->add_to_super == NULL && !partitions_ok &&
1102 policy_action_allows(pol, superlist[i]->name, act_spare))
1103 partitions_ok = 1;
1104 }
1105 rv = 0;
1106 if (arrays_ok)
1107 rv = array_try_spare(devname, dfdp, pol, st, verbose);
1108 if (rv == 0 && partitions_ok)
1109 rv = partition_try_spare(devname, dfdp, pol, st, verbose);
1110 return rv;
1111 }
1112
1113 int IncrementalScan(int verbose)
1114 {
1115 /* look at every device listed in the 'map' file.
1116 * If one is found that is not running then:
1117 * look in mdadm.conf for bitmap file.
1118 * if one exists, but array has none, add it.
1119 * try to start array in auto-readonly mode
1120 */
1121 struct map_ent *mapl = NULL;
1122 struct map_ent *me;
1123 struct mddev_ident *devs, *mddev;
1124 int rv = 0;
1125
1126 map_read(&mapl);
1127 devs = conf_get_ident(NULL);
1128
1129 for (me = mapl ; me ; me = me->next) {
1130 mdu_array_info_t array;
1131 mdu_bitmap_file_t bmf;
1132 struct mdinfo *sra;
1133 int mdfd = open_dev(me->devnum);
1134
1135 if (mdfd < 0)
1136 continue;
1137 if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
1138 errno != ENODEV) {
1139 close(mdfd);
1140 continue;
1141 }
1142 /* Ok, we can try this one. Maybe it needs a bitmap */
1143 for (mddev = devs ; mddev ; mddev = mddev->next)
1144 if (mddev->devname && me->path
1145 && devname_matches(mddev->devname, me->path))
1146 break;
1147 if (mddev && mddev->bitmap_file) {
1148 /*
1149 * Note: early kernels will wrongly fail this, so it
1150 * is a hint only
1151 */
1152 int added = -1;
1153 if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
1154 int bmfd = open(mddev->bitmap_file, O_RDWR);
1155 if (bmfd >= 0) {
1156 added = ioctl(mdfd, SET_BITMAP_FILE,
1157 bmfd);
1158 close(bmfd);
1159 }
1160 }
1161 if (verbose >= 0) {
1162 if (added == 0)
1163 fprintf(stderr, Name
1164 ": Added bitmap %s to %s\n",
1165 mddev->bitmap_file, me->path);
1166 else if (errno != EEXIST)
1167 fprintf(stderr, Name
1168 ": Failed to add bitmap to %s: %s\n",
1169 me->path, strerror(errno));
1170 }
1171 }
1172 sra = sysfs_read(mdfd, 0, 0);
1173 if (sra) {
1174 if (sysfs_set_str(sra, NULL,
1175 "array_state", "read-auto") == 0) {
1176 if (verbose >= 0)
1177 fprintf(stderr, Name
1178 ": started array %s\n",
1179 me->path ?: devnum2devname(me->devnum));
1180 } else {
1181 fprintf(stderr, Name
1182 ": failed to start array %s: %s\n",
1183 me->path ?: devnum2devname(me->devnum),
1184 strerror(errno));
1185 rv = 1;
1186 }
1187 }
1188 }
1189 return rv;
1190 }
1191
1192 static char *container2devname(char *devname)
1193 {
1194 char *mdname = NULL;
1195
1196 if (devname[0] == '/') {
1197 int fd = open(devname, O_RDONLY);
1198 if (fd >= 0) {
1199 mdname = devnum2devname(fd2devnum(fd));
1200 close(fd);
1201 }
1202 } else {
1203 int uuid[4];
1204 struct map_ent *mp, *map = NULL;
1205
1206 if (!parse_uuid(devname, uuid))
1207 return mdname;
1208 mp = map_by_uuid(&map, uuid);
1209 if (mp)
1210 mdname = devnum2devname(mp->devnum);
1211 map_free(map);
1212 }
1213
1214 return mdname;
1215 }
1216
1217 static int Incremental_container(struct supertype *st, char *devname,
1218 char *homehost, int verbose,
1219 int runstop, int autof)
1220 {
1221 /* Collect the contents of this container and for each
1222 * array, choose a device name and assemble the array.
1223 */
1224
1225 struct mdinfo *list;
1226 struct mdinfo *ra;
1227 struct map_ent *map = NULL;
1228 struct mdinfo info;
1229 int trustworthy;
1230 struct mddev_ident *match;
1231 int rv = 0;
1232
1233 memset(&info, 0, sizeof(info));
1234 st->ss->getinfo_super(st, &info, NULL);
1235
1236 if ((runstop > 0 && info.container_enough >= 0) ||
1237 info.container_enough > 0)
1238 /* pass */;
1239 else {
1240 if (verbose)
1241 fprintf(stderr, Name ": not enough devices to start the container\n");
1242 return 0;
1243 }
1244
1245 match = search_mdstat(st, &info, devname, verbose, &rv);
1246 if (match == NULL && rv == 2)
1247 return rv;
1248
1249 /* Need to compute 'trustworthy' */
1250 if (match)
1251 trustworthy = LOCAL;
1252 else if (st->ss->match_home(st, homehost) == 1)
1253 trustworthy = LOCAL;
1254 else if (st->ss->match_home(st, "any") == 1)
1255 trustworthy = LOCAL;
1256 else
1257 trustworthy = FOREIGN;
1258
1259 list = st->ss->container_content(st, NULL);
1260 if (map_lock(&map))
1261 fprintf(stderr, Name ": failed to get exclusive lock on "
1262 "mapfile\n");
1263
1264 for (ra = list ; ra ; ra = ra->next) {
1265 int mdfd;
1266 char chosen_name[1024];
1267 struct map_ent *mp;
1268 struct mddev_ident *match = NULL;
1269
1270 mp = map_by_uuid(&map, ra->uuid);
1271
1272 if (mp) {
1273 mdfd = open_dev(mp->devnum);
1274 if (mp->path)
1275 strcpy(chosen_name, mp->path);
1276 else
1277 strcpy(chosen_name, devnum2devname(mp->devnum));
1278 } else {
1279
1280 /* Check in mdadm.conf for container == devname and
1281 * member == ra->text_version after second slash.
1282 */
1283 char *sub = strchr(ra->text_version+1, '/');
1284 struct mddev_ident *array_list;
1285 if (sub) {
1286 sub++;
1287 array_list = conf_get_ident(NULL);
1288 } else
1289 array_list = NULL;
1290 for(; array_list ; array_list = array_list->next) {
1291 char *dn;
1292 if (array_list->member == NULL ||
1293 array_list->container == NULL)
1294 continue;
1295 if (strcmp(array_list->member, sub) != 0)
1296 continue;
1297 if (array_list->uuid_set &&
1298 !same_uuid(ra->uuid, array_list->uuid, st->ss->swapuuid))
1299 continue;
1300 dn = container2devname(array_list->container);
1301 if (dn == NULL)
1302 continue;
1303 if (strncmp(dn, ra->text_version+1,
1304 strlen(dn)) != 0 ||
1305 ra->text_version[strlen(dn)+1] != '/') {
1306 free(dn);
1307 continue;
1308 }
1309 free(dn);
1310 /* we have a match */
1311 match = array_list;
1312 if (verbose>0)
1313 fprintf(stderr, Name ": match found for member %s\n",
1314 array_list->member);
1315 break;
1316 }
1317
1318 if (match && match->devname &&
1319 strcasecmp(match->devname, "<ignore>") == 0) {
1320 if (verbose > 0)
1321 fprintf(stderr, Name ": array %s/%s is "
1322 "explicitly ignored by mdadm.conf\n",
1323 match->container, match->member);
1324 return 2;
1325 }
1326 if (match)
1327 trustworthy = LOCAL;
1328
1329 mdfd = create_mddev(match ? match->devname : NULL,
1330 ra->name,
1331 autof,
1332 trustworthy,
1333 chosen_name);
1334 }
1335
1336 if (mdfd < 0) {
1337 fprintf(stderr, Name ": failed to open %s: %s.\n",
1338 chosen_name, strerror(errno));
1339 return 2;
1340 }
1341
1342 assemble_container_content(st, mdfd, ra, runstop,
1343 chosen_name, verbose);
1344 }
1345 map_unlock(&map);
1346 return 0;
1347 }
1348
1349 /*
1350 * IncrementalRemove - Attempt to see if the passed in device belongs to any
1351 * raid arrays, and if so first fail (if needed) and then remove the device.
1352 *
1353 * @devname - The device we want to remove
1354 *
1355 * Note: the device name must be a kernel name like "sda", so
1356 * that we can find it in /proc/mdstat
1357 */
1358 int IncrementalRemove(char *devname, int verbose)
1359 {
1360 int mdfd;
1361 int rv;
1362 struct mdstat_ent *ent;
1363 struct mddev_dev devlist;
1364
1365 if (strchr(devname, '/')) {
1366 fprintf(stderr, Name ": incremental removal requires a "
1367 "kernel device name, not a file: %s\n", devname);
1368 return 1;
1369 }
1370 ent = mdstat_by_component(devname);
1371 if (!ent) {
1372 fprintf(stderr, Name ": %s does not appear to be a component "
1373 "of any array\n", devname);
1374 return 1;
1375 }
1376 mdfd = open_dev(ent->devnum);
1377 if (mdfd < 0) {
1378 fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev);
1379 return 1;
1380 }
1381 memset(&devlist, 0, sizeof(devlist));
1382 devlist.devname = devname;
1383 devlist.disposition = 'f';
1384 Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
1385 devlist.disposition = 'r';
1386 rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
1387 close(mdfd);
1388 return rv;
1389 }