X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Monitor.c;h=b527165b803acc2eb3a9d84132f44a39f64aeba1;hp=22045280790c41643fcbb2ca224856ff6deee8d6;hb=45c43276d02a32876c7e1f9f0d04580595141b3d;hpb=b98943a4f889b466a3d07264068042b18c620d33 diff --git a/Monitor.c b/Monitor.c index 22045280..b527165b 100644 --- a/Monitor.c +++ b/Monitor.c @@ -125,8 +125,9 @@ int Monitor(struct mddev_dev *devlist, struct state *st2; int finished = 0; struct mdstat_ent *mdstat = NULL; - char *mailfrom = NULL; + char *mailfrom; struct alert_info info; + struct mddev_ident *mdlist; if (!mailaddr) { mailaddr = conf_get_mailaddr(); @@ -138,7 +139,7 @@ int Monitor(struct mddev_dev *devlist, if (!alert_cmd) { alert_cmd = conf_get_program(); - if (alert_cmd && ! c->scan) + if (alert_cmd && !c->scan) pr_err("Monitor using program \"%s\" from config file\n", alert_cmd); } @@ -162,9 +163,10 @@ int Monitor(struct mddev_dev *devlist, return 1; if (devlist == NULL) { - struct mddev_ident *mdlist = conf_get_ident(NULL); - for (; mdlist; mdlist=mdlist->next) { + mdlist = conf_get_ident(NULL); + for (; mdlist; mdlist = mdlist->next) { struct state *st; + if (mdlist->devname == NULL) continue; if (strcasecmp(mdlist->devname, "") == 0) @@ -188,9 +190,10 @@ int Monitor(struct mddev_dev *devlist, } } else { struct mddev_dev *dv; - for (dv=devlist ; dv; dv=dv->next) { - struct mddev_ident *mdlist = conf_get_ident(dv->devname); + + for (dv = devlist; dv; dv = dv->next) { struct state *st = xcalloc(1, sizeof *st); + mdlist = conf_get_ident(dv->devname); st->devname = xstrdup(dv->devname); st->next = statelist; st->devnm[0] = 0; @@ -205,18 +208,18 @@ int Monitor(struct mddev_dev *devlist, } } - while (! finished) { + while (!finished) { int new_found = 0; struct state *st, **stp; int anydegraded = 0; if (mdstat) free_mdstat(mdstat); - mdstat = mdstat_read(oneshot?0:1, 0); + mdstat = mdstat_read(oneshot ? 0 : 1, 0); if (!mdstat) mdstat_close(); - for (st=statelist; st; st=st->next) + for (st = statelist; st; st = st->next) if (check_array(st, mdstat, c->test, &info, increments, c->prefer)) anydegraded = 1; @@ -290,8 +293,8 @@ static int make_daemon(char *pidfile) } close(0); open("/dev/null", O_RDWR); - dup2(0,1); - dup2(0,2); + dup2(0, 1); + dup2(0, 2); setsid(); return -1; } @@ -322,8 +325,7 @@ static int check_one_sharer(int scan) fclose(fp); } if (scan) { - if (mkdir(MDMON_DIR, S_IRWXU) < 0 && - errno != EEXIST) { + if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) { pr_err("Can't create autorebuild.pid file\n"); } else { fp = fopen(path, "w"); @@ -346,7 +348,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) { time_t now = time(0); - printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device"); + printf("%1.15s: %s on %s %s\n", ctime(&now) + 4, + event, dev, disc?disc:"unknown device"); } if (info->alert_cmd) { int pid = fork(); @@ -362,11 +365,10 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) exit(2); } } - if (info->mailaddr && - (strncmp(event, "Fail", 4)==0 || - strncmp(event, "Test", 4)==0 || - strncmp(event, "Spares", 6)==0 || - strncmp(event, "Degrade", 7)==0)) { + if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 || + strncmp(event, "Test", 4) == 0 || + strncmp(event, "Spares", 6) == 0 || + strncmp(event, "Degrade", 7) == 0)) { FILE *mp = popen(Sendmail, "w"); if (mp) { FILE *mdstat; @@ -376,7 +378,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) if (info->mailfrom) fprintf(mp, "From: %s\n", info->mailfrom); else - fprintf(mp, "From: %s monitoring \n", Name); + fprintf(mp, "From: %s monitoring \n", + Name); fprintf(mp, "To: %s\n", info->mailaddr); fprintf(mp, "Subject: %s event on %s:%s\n\n", event, dev, hname); @@ -402,8 +405,9 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) int n; fprintf(mp, "\nP.S. The /proc/mdstat file currently contains the following:\n\n"); - while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0) - n=fwrite(buf, 1, n, mp); + while ((n = fread(buf, 1, sizeof(buf), + mdstat)) > 0) + n = fwrite(buf, 1, n, mp); fclose(mdstat); } pclose(mp); @@ -415,13 +419,13 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) /* Log at a different severity depending on the event. * * These are the critical events: */ - if (strncmp(event, "Fail", 4)==0 || - strncmp(event, "Degrade", 7)==0 || - strncmp(event, "DeviceDisappeared", 17)==0) + if (strncmp(event, "Fail", 4) == 0 || + strncmp(event, "Degrade", 7) == 0 || + strncmp(event, "DeviceDisappeared", 17) == 0) priority = LOG_CRIT; /* Good to know about, but are not failures: */ - else if (strncmp(event, "Rebuild", 7)==0 || - strncmp(event, "MoveSpare", 9)==0 || + else if (strncmp(event, "Rebuild", 7) == 0 || + strncmp(event, "MoveSpare", 9) == 0 || strncmp(event, "Spares", 6) != 0) priority = LOG_WARNING; /* Everything else: */ @@ -461,6 +465,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, int last_disk; int new_array = 0; int retval; + int is_container = 0; + unsigned long redundancy_only_flags = 0; if (test) alert("TestMessage", dev, NULL, ainfo); @@ -471,18 +477,39 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, if (fd < 0) goto disappeared; - if (!md_array_active(fd)) + if (st->devnm[0] == 0) + strcpy(st->devnm, fd2devnm(fd)); + + for (mse2 = mdstat; mse2; mse2 = mse2->next) + if (strcmp(mse2->devnm, st->devnm) == 0) { + mse2->devnm[0] = 0; /* flag it as "used" */ + mse = mse2; + } + + if (!mse) { + /* duplicated array in statelist + * or re-created after reading mdstat + */ + st->err++; + goto out; + } + + if (mse->level == NULL) + is_container = 1; + + if (!is_container && !md_array_active(fd)) goto disappeared; fcntl(fd, F_SETFD, FD_CLOEXEC); if (md_get_array_info(fd, &array) < 0) goto disappeared; - if (st->devnm[0] == 0) - strcpy(st->devnm, fd2devnm(fd)); + if (!is_container && map_name(pers, mse->level) > 0) + redundancy_only_flags |= GET_MISMATCH; + + sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS | + GET_STATE | redundancy_only_flags); - sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEGRADED | - GET_MISMATCH | GET_DEVS | GET_STATE); if (!sra) goto disappeared; @@ -496,19 +523,6 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, goto out; } - for (mse2 = mdstat ; mse2 ; mse2=mse2->next) - if (strcmp(mse2->devnm, st->devnm) == 0) { - mse2->devnm[0] = 0; /* flag it as "used" */ - mse = mse2; - } - - if (!mse) { - /* duplicated array in statelist - * or re-created after reading mdstat*/ - st->err++; - close(fd); - goto out; - } /* this array is in /proc/mdstat */ if (array.utime == 0) /* external arrays don't update utime, so @@ -524,9 +538,9 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, } if (st->utime == array.utime && st->failed == sra->array.failed_disks && - st->working == array.working_disks && + st->working == sra->array.working_disks && st->spare == sra->array.spare_disks && - (mse == NULL || (mse->percent == st->percent))) { + (mse == NULL || (mse->percent == st->percent))) { if ((st->active < st->raid) && st->spare == 0) retval = 1; goto out; @@ -543,7 +557,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, alert("RebuildStarted", dev, NULL, ainfo); if (st->percent >= 0 && mse->percent >= 0 && (mse->percent / increments) > (st->percent / increments)) { - char percentalert[15]; + char percentalert[18]; /* * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars) */ @@ -567,7 +581,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, char cnt[80]; snprintf(cnt, sizeof(cnt), " mismatches found: %d (on raid level %d)", - sra->mismatch_cnt, sra->array.level); + sra->mismatch_cnt, sra->array.level); alert("RebuildFinished", dev, cnt, ainfo); } else alert("RebuildFinished", dev, NULL, ainfo); @@ -593,7 +607,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, strncmp(mse->metadata_version, "external:", 9) == 0 && is_subarray(mse->metadata_version+9)) { char *sl; - strcpy(st->parent_devnm, mse->metadata_version+10); + strcpy(st->parent_devnm, mse->metadata_version + 10); sl = strchr(st->parent_devnm, '/'); if (sl) *sl = 0; @@ -602,9 +616,9 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, if (st->metadata == NULL && st->parent_devnm[0] == 0) st->metadata = super_by_fd(fd, NULL); - for (i=0; idevid[i] = makedev(disc.major, disc.minor); } st->active = sra->array.active_disks; - st->working = array.working_disks; + st->working = sra->array.working_disks; st->spare = sra->array.spare_disks; st->failed = sra->array.failed_disks; st->utime = array.utime; @@ -649,7 +663,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, out: if (sra) sysfs_free(sra); - if (fd > 0) + if (fd >= 0) close(fd); return retval; @@ -667,12 +681,10 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, int new_found = 0; char *name; - for (mse=mdstat; mse; mse=mse->next) - if (mse->devnm[0] && - (!mse->level || /* retrieve containers */ - (strcmp(mse->level, "raid0") != 0 && - strcmp(mse->level, "linear") != 0)) - ) { + for (mse = mdstat; mse; mse = mse->next) + if (mse->devnm[0] && (!mse->level || /* retrieve containers */ + (strcmp(mse->level, "raid0") != 0 && + strcmp(mse->level, "linear") != 0))) { struct state *st = xcalloc(1, sizeof *st); mdu_array_info_t array; int fd; @@ -706,7 +718,8 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, st->percent = RESYNC_UNKNOWN; st->expected_spares = -1; if (mse->metadata_version && - strncmp(mse->metadata_version, "external:", 9) == 0 && + strncmp(mse->metadata_version, + "external:", 9) == 0 && is_subarray(mse->metadata_version+9)) { char *sl; strcpy(st->parent_devnm, @@ -728,8 +741,7 @@ static int get_required_spare_criteria(struct state *st, { int fd; - if (!st->metadata || - !st->metadata->ss->get_spare_criteria) { + if (!st->metadata || !st->metadata->ss->get_spare_criteria) { sc->min_size = 0; sc->sector_size = 0; return 0; @@ -778,14 +790,13 @@ static int check_donor(struct state *from, struct state *to) } static dev_t choose_spare(struct state *from, struct state *to, - struct domainlist *domlist, struct spare_criteria *sc) + struct domainlist *domlist, struct spare_criteria *sc) { int d; dev_t dev = 0; for (d = from->raid; !dev && d < MAX_DISKS; d++) { - if (from->devid[d] > 0 && - from->devstate[d] == 0) { + if (from->devid[d] > 0 && from->devstate[d] == 0) { struct dev_policy *pol; unsigned long long dev_size; unsigned int dev_sector_size; @@ -809,7 +820,8 @@ static dev_t choose_spare(struct state *from, struct state *to, if (from->spare_group) pol_add(&pol, pol_domain, from->spare_group, NULL); - if (domain_test(domlist, pol, to->metadata->ss->name) == 1) + if (domain_test(domlist, pol, + to->metadata->ss->name) == 1) dev = from->devid[d]; dev_policy_free(pol); } @@ -856,8 +868,8 @@ static dev_t container_choose_spare(struct state *from, struct state *to, } dp = list->devs; while (dp) { - if (dp->disk.state & (1<disk.state & (1<disk.state & (1 << MD_DISK_SYNC) && + !(dp->disk.state & (1 << MD_DISK_FAULTY))) active_cnt++; dp = dp->next; } @@ -890,8 +902,7 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info link_containers_with_subarrays(statelist); for (st = statelist; st; st = st->next) - if (st->active < st->raid && - st->spare == 0 && !st->err) { + if (st->active < st->raid && st->spare == 0 && !st->err) { struct domainlist *domlist = NULL; int d; struct state *to = st; @@ -939,9 +950,11 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info else devid = choose_spare(from, to, domlist, &sc); - if (devid > 0 - && move_spare(from->devname, to->devname, devid)) { - alert("MoveSpare", to->devname, from->devname, info); + if (devid > 0 && + move_spare(from->devname, to->devname, + devid)) { + alert("MoveSpare", to->devname, + from->devname, info); break; } } @@ -966,8 +979,7 @@ static void link_containers_with_subarrays(struct state *list) for (st = list; st; st = st->next) if (st->parent_devnm[0]) for (cont = list; cont; cont = cont->next) - if (!cont->err && - cont->parent_devnm[0] == 0 && + if (!cont->err && cont->parent_devnm[0] == 0 && strcmp(cont->devnm, st->parent_devnm) == 0) { st->parent = cont; st->subarray = cont->subarray; @@ -980,18 +992,27 @@ static void link_containers_with_subarrays(struct state *list) int Wait(char *dev) { char devnm[32]; + dev_t rdev; + char *tmp; int rv = 1; int frozen_remaining = 3; - if (!stat_is_blkdev(dev, NULL)) + if (!stat_is_blkdev(dev, &rdev)) return 2; - strcpy(devnm, dev); + + tmp = devid2devnm(rdev); + if (!tmp) { + pr_err("Cannot get md device name.\n"); + return 2; + } + + strcpy(devnm, tmp); while(1) { struct mdstat_ent *ms = mdstat_read(1, 0); struct mdstat_ent *e; - for (e=ms ; e; e=e->next) + for (e = ms; e; e = e->next) if (strcmp(e->devnm, devnm) == 0) break; @@ -1034,10 +1055,13 @@ int Wait(char *dev) } } +/* The state "broken" is used only for RAID0/LINEAR - it's the same as + * "clean", but used in case the array has one or more members missing. + */ static char *clean_states[] = { - "clear", "inactive", "readonly", "read-auto", "clean", NULL }; + "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL }; -int WaitClean(char *dev, int sock, int verbose) +int WaitClean(char *dev, int verbose) { int fd; struct mdinfo *mdi; @@ -1095,7 +1119,8 @@ int WaitClean(char *dev, int sock, int verbose) rv = read(state_fd, buf, sizeof(buf)); if (rv < 0) break; - if (sysfs_match_word(buf, clean_states) <= 4) + if (sysfs_match_word(buf, clean_states) < + (int)ARRAY_SIZE(clean_states) - 1) break; rv = sysfs_wait(state_fd, &delay); if (rv < 0 && errno != EINTR) @@ -1104,18 +1129,18 @@ int WaitClean(char *dev, int sock, int verbose) } if (rv < 0) rv = 1; - else if (fping_monitor(sock) == 0 || - ping_monitor(mdi->text_version) == 0) { + else if (ping_monitor(mdi->text_version) == 0) { /* we need to ping to close the window between array * state transitioning to clean and the metadata being * marked clean */ rv = 0; - } else + } else { rv = 1; + pr_err("Error connecting monitor with %s\n", dev); + } if (rv && verbose) - pr_err("Error waiting for %s to be clean\n", - dev); + pr_err("Error waiting for %s to be clean\n", dev); /* restore the original safe_mode_delay */ sysfs_set_safemode(mdi, mdi->safe_mode_delay);