]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Monitor.c
Monitor: array that has disappeared doesn't need spares
[thirdparty/mdadm.git] / Monitor.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_p.h"
27 #include "md_u.h"
28 #include <sys/wait.h>
29 #include <signal.h>
30 #include <limits.h>
31 #include <syslog.h>
32
33 /* The largest number of disks current arrays can manage is 384
34 * This really should be dynamically, but that will have to wait
35 * At least it isn't MD_SB_DISKS.
36 */
37 #define MaxDisks 384
38 struct state {
39 char *devname;
40 int devnum; /* to sync with mdstat info */
41 long utime;
42 int err;
43 char *spare_group;
44 int active, working, failed, spare, raid;
45 int expected_spares;
46 int devstate[MaxDisks];
47 dev_t devid[MaxDisks];
48 int percent;
49 int parent_dev; /* For subarray, devnum of parent.
50 * For others, NoMdDev
51 */
52 struct supertype *metadata;
53 struct state *subarray;/* for a container it is a link to first subarray
54 * for a subarray it is a link to next subarray
55 * in the same container */
56 struct state *parent; /* for a subarray it is a link to its container
57 */
58 struct state *next;
59 };
60
61 struct alert_info {
62 char *mailaddr;
63 char *mailfrom;
64 char *alert_cmd;
65 int dosyslog;
66 };
67 static int make_daemon(char *pidfile);
68 static int check_one_sharer(int scan);
69 static void alert(char *event, char *dev, char *disc, struct alert_info *info);
70 static int check_array(struct state *st, struct mdstat_ent *mdstat,
71 int test, struct alert_info *info,
72 int increments);
73 static int add_new_arrays(struct mdstat_ent *mdstat, struct state *statelist,
74 int test, struct alert_info *info);
75 static void try_spare_migration(struct state *statelist, struct alert_info *info);
76 static void link_containers_with_subarrays(struct state *list);
77
78 int Monitor(struct mddev_dev *devlist,
79 char *mailaddr, char *alert_cmd,
80 int period, int daemonise, int scan, int oneshot,
81 int dosyslog, int test, char *pidfile, int increments,
82 int share)
83 {
84 /*
85 * Every few seconds, scan every md device looking for changes
86 * When a change is found, log it, possibly run the alert command,
87 * and possibly send Email
88 *
89 * For each array, we record:
90 * Update time
91 * active/working/failed/spare drives
92 * State of each device.
93 * %rebuilt if rebuilding
94 *
95 * If the update time changes, check out all the data again
96 * It is possible that we cannot get the state of each device
97 * due to bugs in the md kernel module.
98 * We also read /proc/mdstat to get rebuild percent,
99 * and to get state on all active devices incase of kernel bug.
100 *
101 * Events are:
102 * Fail
103 * An active device had Faulty set or Active/Sync removed
104 * FailSpare
105 * A spare device had Faulty set
106 * SpareActive
107 * An active device had a reverse transition
108 * RebuildStarted
109 * percent went from -1 to +ve
110 * RebuildNN
111 * percent went from below to not-below NN%
112 * DeviceDisappeared
113 * Couldn't access a device which was previously visible
114 *
115 * if we detect an array with active<raid and spare==0
116 * we look at other arrays that have same spare-group
117 * If we find one with active==raid and spare>0,
118 * and if we can get_disk_info and find a name
119 * Then we hot-remove and hot-add to the other array
120 *
121 * If devlist is NULL, then we can monitor everything because --scan
122 * was given. We get an initial list from config file and add anything
123 * that appears in /proc/mdstat
124 */
125
126 struct state *statelist = NULL;
127 int finished = 0;
128 struct mdstat_ent *mdstat = NULL;
129 char *mailfrom = NULL;
130 struct alert_info info;
131
132 if (!mailaddr) {
133 mailaddr = conf_get_mailaddr();
134 if (mailaddr && ! scan)
135 fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
136 mailaddr);
137 }
138 mailfrom = conf_get_mailfrom();
139
140 if (!alert_cmd) {
141 alert_cmd = conf_get_program();
142 if (alert_cmd && ! scan)
143 fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
144 alert_cmd);
145 }
146 if (scan && !mailaddr && !alert_cmd) {
147 fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
148 return 1;
149 }
150 info.alert_cmd = alert_cmd;
151 info.mailaddr = mailaddr;
152 info.mailfrom = mailfrom;
153 info.dosyslog = dosyslog;
154
155 if (daemonise)
156 if (make_daemon(pidfile))
157 return 1;
158
159 if (share)
160 if (check_one_sharer(scan))
161 return 1;
162
163 if (devlist == NULL) {
164 struct mddev_ident *mdlist = conf_get_ident(NULL);
165 for (; mdlist; mdlist=mdlist->next) {
166 struct state *st;
167 if (mdlist->devname == NULL)
168 continue;
169 if (strcasecmp(mdlist->devname, "<ignore>") == 0)
170 continue;
171 st = calloc(1, sizeof *st);
172 if (st == NULL)
173 continue;
174 if (mdlist->devname[0] == '/')
175 st->devname = strdup(mdlist->devname);
176 else {
177 st->devname = malloc(8+strlen(mdlist->devname)+1);
178 strcpy(strcpy(st->devname, "/dev/md/"),
179 mdlist->devname);
180 }
181 st->next = statelist;
182 st->devnum = INT_MAX;
183 st->percent = -2;
184 st->expected_spares = mdlist->spare_disks;
185 if (mdlist->spare_group)
186 st->spare_group = strdup(mdlist->spare_group);
187 statelist = st;
188 }
189 } else {
190 struct mddev_dev *dv;
191 for (dv=devlist ; dv; dv=dv->next) {
192 struct mddev_ident *mdlist = conf_get_ident(dv->devname);
193 struct state *st = calloc(1, sizeof *st);
194 if (st == NULL)
195 continue;
196 st->devname = strdup(dv->devname);
197 st->next = statelist;
198 st->devnum = INT_MAX;
199 st->percent = -2;
200 st->expected_spares = -1;
201 if (mdlist) {
202 st->expected_spares = mdlist->spare_disks;
203 if (mdlist->spare_group)
204 st->spare_group = strdup(mdlist->spare_group);
205 }
206 statelist = st;
207 }
208 }
209
210
211 while (! finished) {
212 int new_found = 0;
213 struct state *st;
214 int anydegraded = 0;
215
216 if (mdstat)
217 free_mdstat(mdstat);
218 mdstat = mdstat_read(oneshot?0:1, 0);
219
220 for (st=statelist; st; st=st->next)
221 if (check_array(st, mdstat, test, &info, increments))
222 anydegraded = 1;
223
224 /* now check if there are any new devices found in mdstat */
225 if (scan)
226 new_found = add_new_arrays(mdstat, statelist, test,
227 &info);
228
229 /* If an array has active < raid && spare == 0 && spare_group != NULL
230 * Look for another array with spare > 0 and active == raid and same spare_group
231 * if found, choose a device and hotremove/hotadd
232 */
233 if (share && anydegraded)
234 try_spare_migration(statelist, &info);
235 if (!new_found) {
236 if (oneshot)
237 break;
238 else
239 mdstat_wait(period);
240 }
241 test = 0;
242 }
243 if (pidfile)
244 unlink(pidfile);
245 return 0;
246 }
247
248 static int make_daemon(char *pidfile)
249 {
250 int pid = fork();
251 if (pid > 0) {
252 if (!pidfile)
253 printf("%d\n", pid);
254 else {
255 FILE *pid_file;
256 pid_file=fopen(pidfile, "w");
257 if (!pid_file)
258 perror("cannot create pid file");
259 else {
260 fprintf(pid_file,"%d\n", pid);
261 fclose(pid_file);
262 }
263 }
264 return 0;
265 }
266 if (pid < 0) {
267 perror("daemonise");
268 return 1;
269 }
270 close(0);
271 open("/dev/null", O_RDWR);
272 dup2(0,1);
273 dup2(0,2);
274 setsid();
275 return 0;
276 }
277
278 static int check_one_sharer(int scan)
279 {
280 int pid, rv;
281 FILE *fp;
282 char dir[20];
283 struct stat buf;
284 fp = fopen("/var/run/mdadm/autorebuild.pid", "r");
285 if (fp) {
286 fscanf(fp, "%d", &pid);
287 sprintf(dir, "/proc/%d", pid);
288 rv = stat(dir, &buf);
289 if (rv != -1) {
290 if (scan) {
291 fprintf(stderr, Name ": Only one "
292 "autorebuild process allowed"
293 " in scan mode, aborting\n");
294 fclose(fp);
295 return 1;
296 } else {
297 fprintf(stderr, Name ": Warning: One"
298 " autorebuild process already"
299 " running.\n");
300 }
301 }
302 fclose(fp);
303 }
304 if (scan) {
305 if (mkdir("/var/run/mdadm", S_IRWXU) < 0 &&
306 errno != EEXIST) {
307 fprintf(stderr, Name ": Can't create "
308 "autorebuild.pid file\n");
309 } else {
310 fp = fopen("/var/run/mdadm/autorebuild.pid", "w");
311 if (!fp)
312 fprintf(stderr, Name ": Cannot create"
313 " autorebuild.pid"
314 "file\n");
315 else {
316 pid = getpid();
317 fprintf(fp, "%d\n", pid);
318 fclose(fp);
319 }
320 }
321 }
322 return 0;
323 }
324
325 static void alert(char *event, char *dev, char *disc, struct alert_info *info)
326 {
327 int priority;
328
329 if (!info->alert_cmd && !info->mailaddr) {
330 time_t now = time(0);
331
332 printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
333 }
334 if (info->alert_cmd) {
335 int pid = fork();
336 switch(pid) {
337 default:
338 waitpid(pid, NULL, 0);
339 break;
340 case -1:
341 break;
342 case 0:
343 execl(info->alert_cmd, info->alert_cmd,
344 event, dev, disc, NULL);
345 exit(2);
346 }
347 }
348 if (info->mailaddr &&
349 (strncmp(event, "Fail", 4)==0 ||
350 strncmp(event, "Test", 4)==0 ||
351 strncmp(event, "Spares", 6)==0 ||
352 strncmp(event, "Degrade", 7)==0)) {
353 FILE *mp = popen(Sendmail, "w");
354 if (mp) {
355 FILE *mdstat;
356 char hname[256];
357 gethostname(hname, sizeof(hname));
358 signal(SIGPIPE, SIG_IGN);
359 if (info->mailfrom)
360 fprintf(mp, "From: %s\n", info->mailfrom);
361 else
362 fprintf(mp, "From: " Name " monitoring <root>\n");
363 fprintf(mp, "To: %s\n", info->mailaddr);
364 fprintf(mp, "Subject: %s event on %s:%s\n\n",
365 event, dev, hname);
366
367 fprintf(mp,
368 "This is an automatically generated"
369 " mail message from " Name "\n");
370 fprintf(mp, "running on %s\n\n", hname);
371
372 fprintf(mp,
373 "A %s event had been detected on"
374 " md device %s.\n\n", event, dev);
375
376 if (disc && disc[0] != ' ')
377 fprintf(mp,
378 "It could be related to"
379 " component device %s.\n\n", disc);
380 if (disc && disc[0] == ' ')
381 fprintf(mp, "Extra information:%s.\n\n", disc);
382
383 fprintf(mp, "Faithfully yours, etc.\n");
384
385 mdstat = fopen("/proc/mdstat", "r");
386 if (mdstat) {
387 char buf[8192];
388 int n;
389 fprintf(mp,
390 "\nP.S. The /proc/mdstat file"
391 " currently contains the following:\n\n");
392 while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
393 n=fwrite(buf, 1, n, mp);
394 fclose(mdstat);
395 }
396 pclose(mp);
397 }
398 }
399
400 /* log the event to syslog maybe */
401 if (info->dosyslog) {
402 /* Log at a different severity depending on the event.
403 *
404 * These are the critical events: */
405 if (strncmp(event, "Fail", 4)==0 ||
406 strncmp(event, "Degrade", 7)==0 ||
407 strncmp(event, "DeviceDisappeared", 17)==0)
408 priority = LOG_CRIT;
409 /* Good to know about, but are not failures: */
410 else if (strncmp(event, "Rebuild", 7)==0 ||
411 strncmp(event, "MoveSpare", 9)==0 ||
412 strncmp(event, "Spares", 6) != 0)
413 priority = LOG_WARNING;
414 /* Everything else: */
415 else
416 priority = LOG_INFO;
417
418 if (disc)
419 syslog(priority,
420 "%s event detected on md device %s,"
421 " component device %s", event, dev, disc);
422 else
423 syslog(priority,
424 "%s event detected on md device %s",
425 event, dev);
426 }
427 }
428
429 static int check_array(struct state *st, struct mdstat_ent *mdstat,
430 int test, struct alert_info *ainfo,
431 int increments)
432 {
433 /* Update the state 'st' to reflect any changes shown in mdstat,
434 * or found by directly examining the array, and return
435 * '1' if the array is degraded, or '0' if it is optimal (or dead).
436 */
437 struct { int state, major, minor; } info[MaxDisks];
438 mdu_array_info_t array;
439 struct mdstat_ent *mse = NULL, *mse2;
440 char *dev = st->devname;
441 int fd;
442 int i;
443
444 if (test)
445 alert("TestMessage", dev, NULL, ainfo);
446 fd = open(dev, O_RDONLY);
447 if (fd < 0) {
448 if (!st->err)
449 alert("DeviceDisappeared", dev, NULL, ainfo);
450 st->err=1;
451 return 0;
452 }
453 fcntl(fd, F_SETFD, FD_CLOEXEC);
454 if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
455 if (!st->err)
456 alert("DeviceDisappeared", dev, NULL, ainfo);
457 st->err=1;
458 close(fd);
459 return 0;
460 }
461 /* It's much easier to list what array levels can't
462 * have a device disappear than all of them that can
463 */
464 if (array.level == 0 || array.level == -1) {
465 if (!st->err)
466 alert("DeviceDisappeared", dev, "Wrong-Level", ainfo);
467 st->err = 1;
468 close(fd);
469 return 0;
470 }
471 if (st->devnum == INT_MAX) {
472 struct stat stb;
473 if (fstat(fd, &stb) == 0 &&
474 (S_IFMT&stb.st_mode)==S_IFBLK) {
475 if (major(stb.st_rdev) == MD_MAJOR)
476 st->devnum = minor(stb.st_rdev);
477 else
478 st->devnum = -1- (minor(stb.st_rdev)>>6);
479 }
480 }
481
482 for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
483 if (mse2->devnum == st->devnum) {
484 mse2->devnum = INT_MAX; /* flag it as "used" */
485 mse = mse2;
486 }
487
488 if (!mse) {
489 /* duplicated array in statelist
490 * or re-created after reading mdstat*/
491 st->err = 1;
492 close(fd);
493 return 0;
494 }
495 /* this array is in /proc/mdstat */
496 if (array.utime == 0)
497 /* external arrays don't update utime, so
498 * just make sure it is always different. */
499 array.utime = st->utime + 1;;
500
501 if (st->utime == array.utime &&
502 st->failed == array.failed_disks &&
503 st->working == array.working_disks &&
504 st->spare == array.spare_disks &&
505 (mse == NULL || (
506 mse->percent == st->percent
507 ))) {
508 close(fd);
509 st->err = 0;
510 if ((st->active < st->raid) && st->spare == 0)
511 return 1;
512 else
513 return 0;
514 }
515 if (st->utime == 0 && /* new array */
516 mse->pattern && strchr(mse->pattern, '_') /* degraded */
517 )
518 alert("DegradedArray", dev, NULL, ainfo);
519
520 if (st->utime == 0 && /* new array */
521 st->expected_spares > 0 &&
522 array.spare_disks < st->expected_spares)
523 alert("SparesMissing", dev, NULL, ainfo);
524 if (st->percent == -1 &&
525 mse->percent >= 0)
526 alert("RebuildStarted", dev, NULL, ainfo);
527 if (st->percent >= 0 &&
528 mse->percent >= 0 &&
529 (mse->percent / increments) > (st->percent / increments)) {
530 char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
531
532 if((mse->percent / increments) == 0)
533 snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
534 else
535 snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
536
537 alert(percentalert, dev, NULL, ainfo);
538 }
539
540 if (mse->percent == -1 &&
541 st->percent >= 0) {
542 /* Rebuild/sync/whatever just finished.
543 * If there is a number in /mismatch_cnt,
544 * we should report that.
545 */
546 struct mdinfo *sra =
547 sysfs_read(-1, st->devnum, GET_MISMATCH);
548 if (sra && sra->mismatch_cnt > 0) {
549 char cnt[40];
550 sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
551 alert("RebuildFinished", dev, cnt, ainfo);
552 } else
553 alert("RebuildFinished", dev, NULL, ainfo);
554 if (sra)
555 free(sra);
556 }
557 st->percent = mse->percent;
558
559 for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
560 i++) {
561 mdu_disk_info_t disc;
562 disc.number = i;
563 if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
564 info[i].state = disc.state;
565 info[i].major = disc.major;
566 info[i].minor = disc.minor;
567 } else
568 info[i].major = info[i].minor = 0;
569 }
570
571 if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
572 is_subarray(mse->metadata_version+9))
573 st->parent_dev =
574 devname2devnum(mse->metadata_version+10);
575 else
576 st->parent_dev = NoMdDev;
577 if (st->metadata == NULL &&
578 st->parent_dev == NoMdDev)
579 st->metadata = super_by_fd(fd, NULL);
580
581 close(fd);
582
583 for (i=0; i<MaxDisks; i++) {
584 mdu_disk_info_t disc = {0,0,0,0,0};
585 int newstate=0;
586 int change;
587 char *dv = NULL;
588 disc.number = i;
589 if (i > array.raid_disks + array.nr_disks) {
590 newstate = 0;
591 disc.major = disc.minor = 0;
592 } else if (info[i].major || info[i].minor) {
593 newstate = info[i].state;
594 dv = map_dev(info[i].major, info[i].minor, 1);
595 disc.state = newstate;
596 disc.major = info[i].major;
597 disc.minor = info[i].minor;
598 } else if (mse && mse->pattern && i < (int)strlen(mse->pattern)) {
599 switch(mse->pattern[i]) {
600 case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
601 case '_': newstate = 0; break;
602 }
603 disc.major = disc.minor = 0;
604 }
605 if (dv == NULL && st->devid[i])
606 dv = map_dev(major(st->devid[i]),
607 minor(st->devid[i]), 1);
608 change = newstate ^ st->devstate[i];
609 if (st->utime && change && !st->err) {
610 if (i < array.raid_disks &&
611 (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
612 ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
613 ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
614 )
615 alert("Fail", dev, dv, ainfo);
616 else if (i >= array.raid_disks &&
617 (disc.major || disc.minor) &&
618 st->devid[i] == makedev(disc.major, disc.minor) &&
619 ((newstate&change)&(1<<MD_DISK_FAULTY))
620 )
621 alert("FailSpare", dev, dv, ainfo);
622 else if (i < array.raid_disks &&
623 ! (newstate & (1<<MD_DISK_REMOVED)) &&
624 (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
625 ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
626 ((newstate&change)&(1<<MD_DISK_SYNC)))
627 )
628 alert("SpareActive", dev, dv, ainfo);
629 }
630 st->devstate[i] = newstate;
631 st->devid[i] = makedev(disc.major, disc.minor);
632 }
633 st->active = array.active_disks;
634 st->working = array.working_disks;
635 st->spare = array.spare_disks;
636 st->failed = array.failed_disks;
637 st->utime = array.utime;
638 st->raid = array.raid_disks;
639 st->err = 0;
640 if ((st->active < st->raid) && st->spare == 0)
641 return 1;
642 return 0;
643 }
644
645 static int add_new_arrays(struct mdstat_ent *mdstat, struct state *statelist,
646 int test, struct alert_info *info)
647 {
648 struct mdstat_ent *mse;
649 int new_found = 0;
650
651 for (mse=mdstat; mse; mse=mse->next)
652 if (mse->devnum != INT_MAX &&
653 (!mse->level || /* retrieve containers */
654 (strcmp(mse->level, "raid0") != 0 &&
655 strcmp(mse->level, "linear") != 0))
656 ) {
657 struct state *st = calloc(1, sizeof *st);
658 mdu_array_info_t array;
659 int fd;
660 if (st == NULL)
661 continue;
662 st->devname = strdup(get_md_name(mse->devnum));
663 if ((fd = open(st->devname, O_RDONLY)) < 0 ||
664 ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
665 /* no such array */
666 if (fd >=0) close(fd);
667 put_md_name(st->devname);
668 free(st->devname);
669 if (st->metadata) {
670 st->metadata->ss->free_super(st->metadata);
671 free(st->metadata);
672 }
673 free(st);
674 continue;
675 }
676 close(fd);
677 st->next = statelist;
678 st->err = 1;
679 st->devnum = mse->devnum;
680 st->percent = -2;
681 st->expected_spares = -1;
682 if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
683 is_subarray(mse->metadata_version+9))
684 st->parent_dev =
685 devname2devnum(mse->metadata_version+10);
686 else
687 st->parent_dev = NoMdDev;
688 statelist = st;
689 if (test)
690 alert("TestMessage", st->devname, NULL, info);
691 alert("NewArray", st->devname, NULL, info);
692 new_found = 1;
693 }
694 return new_found;
695 }
696
697 unsigned long long min_spare_size_required(struct state *st)
698 {
699 int fd;
700 unsigned long long rv = 0;
701
702 if (!st->metadata ||
703 !st->metadata->ss->min_acceptable_spare_size)
704 return rv;
705
706 fd = open(st->devname, O_RDONLY);
707 if (fd < 0)
708 return 0;
709 st->metadata->ss->load_super(st->metadata, fd, st->devname);
710 close(fd);
711 rv = st->metadata->ss->min_acceptable_spare_size(st->metadata);
712 st->metadata->ss->free_super(st->metadata);
713
714 return rv;
715 }
716
717 static int move_spare(struct state *from, struct state *to,
718 dev_t devid,
719 struct alert_info *info)
720 {
721 struct mddev_dev devlist;
722 char devname[20];
723
724 /* try to remove and add */
725 int fd1 = open(to->devname, O_RDONLY);
726 int fd2 = open(from->devname, O_RDONLY);
727
728 if (fd1 < 0 || fd2 < 0) {
729 if (fd1>=0) close(fd1);
730 if (fd2>=0) close(fd2);
731 return 0;
732 }
733
734 devlist.next = NULL;
735 devlist.used = 0;
736 devlist.re_add = 0;
737 devlist.writemostly = 0;
738 devlist.devname = devname;
739 sprintf(devname, "%d:%d", major(devid), minor(devid));
740
741 devlist.disposition = 'r';
742 if (Manage_subdevs(from->devname, fd2, &devlist, -1, 0) == 0) {
743 devlist.disposition = 'a';
744 if (Manage_subdevs(to->devname, fd1, &devlist, -1, 0) == 0) {
745 alert("MoveSpare", to->devname, from->devname, info);
746 close(fd1);
747 close(fd2);
748 return 1;
749 }
750 else Manage_subdevs(from->devname, fd2, &devlist, -1, 0);
751 }
752 close(fd1);
753 close(fd2);
754 return 0;
755 }
756
757 static int check_donor(struct state *from, struct state *to,
758 struct domainlist *domlist)
759 {
760 struct state *sub;
761
762 if (from == to)
763 return 0;
764 if (from->parent)
765 /* Cannot move from a member */
766 return 0;
767 if (from->err)
768 return 0;
769 for (sub = from->subarray; sub; sub = sub->subarray)
770 /* If source array has degraded subarrays, don't
771 * remove anything
772 */
773 if (sub->active < sub->raid)
774 return 0;
775 if (from->metadata->ss->external == 0)
776 if (from->active < from->raid)
777 return 0;
778 if (from->spare <= 0)
779 return 0;
780 if (domlist == NULL)
781 return 0;
782 return 1;
783 }
784
785 static dev_t choose_spare(struct state *from, struct state *to,
786 struct domainlist *domlist)
787 {
788 int d;
789 dev_t dev = 0;
790 unsigned long long min_size
791 = min_spare_size_required(to);
792
793 for (d = from->raid; !dev && d < MaxDisks; d++) {
794 if (from->devid[d] > 0 &&
795 from->devstate[d] == 0) {
796 struct dev_policy *pol;
797 unsigned long long dev_size;
798
799 if (min_size &&
800 dev_size_from_id(from->devid[d], &dev_size) &&
801 dev_size < min_size)
802 continue;
803
804 pol = devnum_policy(from->devid[d]);
805 if (from->spare_group)
806 pol_add(&pol, pol_domain,
807 from->spare_group, NULL);
808 if (domain_test(domlist, pol, to->metadata->ss->name))
809 dev = from->devid[d];
810 dev_policy_free(pol);
811 }
812 }
813 return dev;
814 }
815
816 static dev_t container_choose_spare(struct state *from, struct state *to,
817 struct domainlist *domlist)
818 {
819 /* This is similar to choose_spare, but we cannot trust devstate,
820 * so we need to read the metadata instead
821 */
822
823 struct supertype *st = from->metadata;
824 int fd = open(from->devname, O_RDONLY);
825 int err;
826 struct mdinfo *disks, *d;
827 unsigned long long min_size
828 = min_spare_size_required(to);
829 dev_t dev = 0;
830
831 if (fd < 0)
832 return 0;
833 if (!st->ss->getinfo_super_disks)
834 return 0;
835
836 err = st->ss->load_container(st, fd, NULL);
837 close(fd);
838 if (err)
839 return 0;
840
841 disks = st->ss->getinfo_super_disks(st);
842 st->ss->free_super(st);
843
844 if (!disks)
845 return 0;
846
847 for (d = disks->devs ; d && !dev ; d = d->next) {
848 if (d->disk.state == 0) {
849 struct dev_policy *pol;
850 unsigned long long dev_size;
851 dev = makedev(d->disk.major,d->disk.minor);
852
853 if (min_size &&
854 dev_size_from_id(dev, &dev_size) &&
855 dev_size < min_size) {
856 dev = 0;
857 continue;
858 }
859 pol = devnum_policy(dev);
860 if (from->spare_group)
861 pol_add(&pol, pol_domain,
862 from->spare_group, NULL);
863 if (!domain_test(domlist, pol, to->metadata->ss->name))
864 dev = 0;
865
866 dev_policy_free(pol);
867 }
868 }
869 sysfs_free(disks);
870 return dev;
871 }
872
873
874 static void try_spare_migration(struct state *statelist, struct alert_info *info)
875 {
876 struct state *from;
877 struct state *st;
878
879 link_containers_with_subarrays(statelist);
880 for (st = statelist; st; st = st->next)
881 if (st->active < st->raid &&
882 st->spare == 0 && !st->err) {
883 struct domainlist *domlist = NULL;
884 int d;
885 struct state *to = st;
886
887 if (to->parent)
888 /* member of a container */
889 to = to->parent;
890
891 for (d = 0; d < MaxDisks; d++)
892 if (to->devid[d])
893 domainlist_add_dev(&domlist,
894 to->devid[d],
895 to->metadata->ss->name);
896 if (to->spare_group)
897 domain_add(&domlist, to->spare_group);
898
899 for (from=statelist ; from ; from=from->next) {
900 dev_t devid;
901 if (!check_donor(from, to, domlist))
902 continue;
903 if (from->metadata->ss->external)
904 devid = container_choose_spare(
905 from, to, domlist);
906 else
907 devid = choose_spare(from, to, domlist);
908 if (devid > 0
909 && move_spare(from, to, devid, info))
910 break;
911 }
912 domain_free(domlist);
913 }
914 }
915
916 /* search the statelist to connect external
917 * metadata subarrays with their containers
918 * We always completely rebuild the tree from scratch as
919 * that is safest considering the possibility of entries
920 * disappearing or changing.
921 */
922 static void link_containers_with_subarrays(struct state *list)
923 {
924 struct state *st;
925 struct state *cont;
926 for (st = list; st; st = st->next) {
927 st->parent = NULL;
928 st->subarray = NULL;
929 }
930 for (st = list; st; st = st->next)
931 if (st->parent_dev != NoMdDev)
932 for (cont = list; cont; cont = cont->next)
933 if (!cont->err &&
934 cont->parent_dev == NoMdDev &&
935 cont->devnum == st->parent_dev) {
936 st->parent = cont;
937 st->subarray = cont->subarray;
938 cont->subarray = st;
939 break;
940 }
941 }
942
943 /* Not really Monitor but ... */
944 int Wait(char *dev)
945 {
946 struct stat stb;
947 int devnum;
948 int rv = 1;
949
950 if (stat(dev, &stb) != 0) {
951 fprintf(stderr, Name ": Cannot find %s: %s\n", dev,
952 strerror(errno));
953 return 2;
954 }
955 devnum = stat2devnum(&stb);
956
957 while(1) {
958 struct mdstat_ent *ms = mdstat_read(1, 0);
959 struct mdstat_ent *e;
960
961 for (e=ms ; e; e=e->next)
962 if (e->devnum == devnum)
963 break;
964
965 if (!e || e->percent < 0) {
966 if (e && e->metadata_version &&
967 strncmp(e->metadata_version, "external:", 9) == 0) {
968 if (is_subarray(&e->metadata_version[9]))
969 ping_monitor(&e->metadata_version[9]);
970 else
971 ping_monitor(devnum2devname(devnum));
972 }
973 free_mdstat(ms);
974 return rv;
975 }
976 free_mdstat(ms);
977 rv = 0;
978 mdstat_wait(5);
979 }
980 }