X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Monitor.c;h=da1003b4bfaf520fae8cf98977e95224bd98b6c4;hb=49b69533e8a62573de987c45cb4469fc8e754723;hp=83a6d10af31b3c655086ddb009f945a3c9e71085;hpb=72362f18aee5adedb405fe61c324604184d74555;p=thirdparty%2Fmdadm.git diff --git a/Monitor.c b/Monitor.c index 83a6d10a..da1003b4 100644 --- a/Monitor.c +++ b/Monitor.c @@ -29,6 +29,7 @@ #include #include #include +#include struct state { char *devname; @@ -63,6 +64,7 @@ struct alert_info { }; static int make_daemon(char *pidfile); static int check_one_sharer(int scan); +static void write_autorebuild_pid(void); static void alert(char *event, char *dev, char *disc, struct alert_info *info); static int check_array(struct state *st, struct mdstat_ent *mdstat, int test, struct alert_info *info, @@ -71,6 +73,7 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, int test, struct alert_info *info); static void try_spare_migration(struct state *statelist, struct alert_info *info); static void link_containers_with_subarrays(struct state *list); +static int check_udev_activity(void); int Monitor(struct mddev_dev *devlist, char *mailaddr, char *alert_cmd, @@ -139,7 +142,7 @@ int Monitor(struct mddev_dev *devlist, if (!alert_cmd) { alert_cmd = conf_get_program(); - if (alert_cmd && ! c->scan) + if (alert_cmd && !c->scan) pr_err("Monitor using program \"%s\" from config file\n", alert_cmd); } @@ -152,6 +155,11 @@ int Monitor(struct mddev_dev *devlist, info.mailfrom = mailfrom; info.dosyslog = dosyslog; + if (share){ + if (check_one_sharer(c->scan)) + return 1; + } + if (daemonise) { int rv = make_daemon(pidfile); if (rv >= 0) @@ -159,13 +167,13 @@ int Monitor(struct mddev_dev *devlist, } if (share) - if (check_one_sharer(c->scan)) - return 1; + write_autorebuild_pid(); if (devlist == NULL) { mdlist = conf_get_ident(NULL); - for (; mdlist; mdlist=mdlist->next) { + for (; mdlist; mdlist = mdlist->next) { struct state *st; + if (mdlist->devname == NULL) continue; if (strcasecmp(mdlist->devname, "") == 0) @@ -189,7 +197,8 @@ int Monitor(struct mddev_dev *devlist, } } else { struct mddev_dev *dv; - for (dv=devlist ; dv; dv=dv->next) { + + for (dv = devlist; dv; dv = dv->next) { struct state *st = xcalloc(1, sizeof *st); mdlist = conf_get_ident(dv->devname); st->devname = xstrdup(dv->devname); @@ -206,21 +215,28 @@ int Monitor(struct mddev_dev *devlist, } } - while (! finished) { + while (!finished) { int new_found = 0; struct state *st, **stp; int anydegraded = 0; + int anyredundant = 0; if (mdstat) free_mdstat(mdstat); - mdstat = mdstat_read(oneshot?0:1, 0); - if (!mdstat) - mdstat_close(); + mdstat = mdstat_read(oneshot ? 0 : 1, 0); - for (st=statelist; st; st=st->next) + for (st = statelist; st; st = st->next) { if (check_array(st, mdstat, c->test, &info, increments, c->prefer)) anydegraded = 1; + /* for external arrays, metadata is filled for + * containers only + */ + if (st->metadata && st->metadata->ss->external) + continue; + if (st->err == 0 && !anyredundant) + anyredundant = 1; + } /* now check if there are any new devices found in mdstat */ if (c->scan) @@ -229,15 +245,26 @@ int Monitor(struct mddev_dev *devlist, /* If an array has active < raid && spare == 0 && spare_group != NULL * Look for another array with spare > 0 and active == raid and same spare_group - * if found, choose a device and hotremove/hotadd + * if found, choose a device and hotremove/hotadd */ if (share && anydegraded) try_spare_migration(statelist, &info); if (!new_found) { if (oneshot) break; - else - mdstat_wait(c->delay); + else if (!anyredundant) { + break; + } + else { + /* + * If mdmonitor is awaken by event, check for udev activity + * to wait for udev to finish new devices processing. + */ + if (mdstat_wait(c->delay) && check_udev_activity()) + pr_err("Error while waiting for UDEV to complete new devices processing\n"); + + mdstat_close(); + } } c->test = 0; @@ -274,8 +301,11 @@ static int make_daemon(char *pidfile) if (!pidfile) printf("%d\n", pid); else { - FILE *pid_file; - pid_file=fopen(pidfile, "w"); + FILE *pid_file = NULL; + int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, + 0644); + if (fd >= 0) + pid_file = fdopen(fd, "w"); if (!pid_file) perror("cannot create pid file"); else { @@ -289,55 +319,70 @@ static int make_daemon(char *pidfile) perror("daemonise"); return 1; } - close(0); - open("/dev/null", O_RDWR); - dup2(0,1); - dup2(0,2); + manage_fork_fds(0); setsid(); return -1; } static int check_one_sharer(int scan) { - int pid, rv; + int pid; + FILE *comm_fp; FILE *fp; - char dir[20]; - char path[100]; - struct stat buf; + char comm_path[PATH_MAX]; + char path[PATH_MAX]; + char comm[20]; + sprintf(path, "%s/autorebuild.pid", MDMON_DIR); fp = fopen(path, "r"); if (fp) { if (fscanf(fp, "%d", &pid) != 1) pid = -1; - sprintf(dir, "/proc/%d", pid); - rv = stat(dir, &buf); - if (rv != -1) { - if (scan) { - pr_err("Only one autorebuild process allowed in scan mode, aborting\n"); - fclose(fp); - return 1; - } else { - pr_err("Warning: One autorebuild process already running.\n"); + snprintf(comm_path, sizeof(comm_path), + "/proc/%d/comm", pid); + comm_fp = fopen(comm_path, "r"); + if (comm_fp) { + if (fscanf(comm_fp, "%s", comm) && + strncmp(basename(comm), Name, strlen(Name)) == 0) { + if (scan) { + pr_err("Only one autorebuild process allowed in scan mode, aborting\n"); + fclose(comm_fp); + fclose(fp); + return 1; + } else { + pr_err("Warning: One autorebuild process already running.\n"); + } } + fclose(comm_fp); } fclose(fp); } - if (scan) { - if (mkdir(MDMON_DIR, S_IRWXU) < 0 && - errno != EEXIST) { + return 0; +} + +static void write_autorebuild_pid() +{ + char path[PATH_MAX]; + int pid; + FILE *fp = NULL; + sprintf(path, "%s/autorebuild.pid", MDMON_DIR); + + if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) { + pr_err("Can't create autorebuild.pid file\n"); + } else { + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700); + + if (fd >= 0) + fp = fdopen(fd, "w"); + + if (!fp) pr_err("Can't create autorebuild.pid file\n"); - } else { - fp = fopen(path, "w"); - if (!fp) - pr_err("Cannot create autorebuild.pidfile\n"); - else { - pid = getpid(); - fprintf(fp, "%d\n", pid); - fclose(fp); - } + else { + pid = getpid(); + fprintf(fp, "%d\n", pid); + fclose(fp); } } - return 0; } static void alert(char *event, char *dev, char *disc, struct alert_info *info) @@ -347,7 +392,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) { time_t now = time(0); - printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device"); + printf("%1.15s: %s on %s %s\n", ctime(&now) + 4, + event, dev, disc?disc:"unknown device"); } if (info->alert_cmd) { int pid = fork(); @@ -363,11 +409,10 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) exit(2); } } - if (info->mailaddr && - (strncmp(event, "Fail", 4)==0 || - strncmp(event, "Test", 4)==0 || - strncmp(event, "Spares", 6)==0 || - strncmp(event, "Degrade", 7)==0)) { + if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 || + strncmp(event, "Test", 4) == 0 || + strncmp(event, "Spares", 6) == 0 || + strncmp(event, "Degrade", 7) == 0)) { FILE *mp = popen(Sendmail, "w"); if (mp) { FILE *mdstat; @@ -377,7 +422,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) if (info->mailfrom) fprintf(mp, "From: %s\n", info->mailfrom); else - fprintf(mp, "From: %s monitoring \n", Name); + fprintf(mp, "From: %s monitoring \n", + Name); fprintf(mp, "To: %s\n", info->mailaddr); fprintf(mp, "Subject: %s event on %s:%s\n\n", event, dev, hname); @@ -403,8 +449,9 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) int n; fprintf(mp, "\nP.S. The /proc/mdstat file currently contains the following:\n\n"); - while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0) - n=fwrite(buf, 1, n, mp); + while ((n = fread(buf, 1, sizeof(buf), + mdstat)) > 0) + n = fwrite(buf, 1, n, mp); fclose(mdstat); } pclose(mp); @@ -416,13 +463,13 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info) /* Log at a different severity depending on the event. * * These are the critical events: */ - if (strncmp(event, "Fail", 4)==0 || - strncmp(event, "Degrade", 7)==0 || - strncmp(event, "DeviceDisappeared", 17)==0) + if (strncmp(event, "Fail", 4) == 0 || + strncmp(event, "Degrade", 7) == 0 || + strncmp(event, "DeviceDisappeared", 17) == 0) priority = LOG_CRIT; /* Good to know about, but are not failures: */ - else if (strncmp(event, "Rebuild", 7)==0 || - strncmp(event, "MoveSpare", 9)==0 || + else if (strncmp(event, "Rebuild", 7) == 0 || + strncmp(event, "MoveSpare", 9) == 0 || strncmp(event, "Spares", 6) != 0) priority = LOG_WARNING; /* Everything else: */ @@ -462,6 +509,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, int last_disk; int new_array = 0; int retval; + int is_container = 0; + unsigned long redundancy_only_flags = 0; if (test) alert("TestMessage", dev, NULL, ainfo); @@ -472,18 +521,39 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, if (fd < 0) goto disappeared; - if (!md_array_active(fd)) + if (st->devnm[0] == 0) + strcpy(st->devnm, fd2devnm(fd)); + + for (mse2 = mdstat; mse2; mse2 = mse2->next) + if (strcmp(mse2->devnm, st->devnm) == 0) { + mse2->devnm[0] = 0; /* flag it as "used" */ + mse = mse2; + } + + if (!mse) { + /* duplicated array in statelist + * or re-created after reading mdstat + */ + st->err++; + goto out; + } + + if (mse->level == NULL) + is_container = 1; + + if (!is_container && !md_array_active(fd)) goto disappeared; fcntl(fd, F_SETFD, FD_CLOEXEC); if (md_get_array_info(fd, &array) < 0) goto disappeared; - if (st->devnm[0] == 0) - strcpy(st->devnm, fd2devnm(fd)); + if (!is_container && map_name(pers, mse->level) > 0) + redundancy_only_flags |= GET_MISMATCH; + + sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS | + GET_STATE | redundancy_only_flags); - sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEGRADED | - GET_MISMATCH | GET_DEVS | GET_STATE); if (!sra) goto disappeared; @@ -497,19 +567,6 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, goto out; } - for (mse2 = mdstat ; mse2 ; mse2=mse2->next) - if (strcmp(mse2->devnm, st->devnm) == 0) { - mse2->devnm[0] = 0; /* flag it as "used" */ - mse = mse2; - } - - if (!mse) { - /* duplicated array in statelist - * or re-created after reading mdstat*/ - st->err++; - close(fd); - goto out; - } /* this array is in /proc/mdstat */ if (array.utime == 0) /* external arrays don't update utime, so @@ -521,13 +578,14 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, st->err = 0; st->percent = RESYNC_NONE; new_array = 1; - alert("NewArray", st->devname, NULL, ainfo); + if (!is_container) + alert("NewArray", st->devname, NULL, ainfo); } if (st->utime == array.utime && st->failed == sra->array.failed_disks && st->working == sra->array.working_disks && st->spare == sra->array.spare_disks && - (mse == NULL || (mse->percent == st->percent))) { + (mse == NULL || (mse->percent == st->percent))) { if ((st->active < st->raid) && st->spare == 0) retval = 1; goto out; @@ -544,7 +602,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, alert("RebuildStarted", dev, NULL, ainfo); if (st->percent >= 0 && mse->percent >= 0 && (mse->percent / increments) > (st->percent / increments)) { - char percentalert[15]; + char percentalert[18]; /* * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars) */ @@ -568,7 +626,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, char cnt[80]; snprintf(cnt, sizeof(cnt), " mismatches found: %d (on raid level %d)", - sra->mismatch_cnt, sra->array.level); + sra->mismatch_cnt, sra->array.level); alert("RebuildFinished", dev, cnt, ainfo); } else alert("RebuildFinished", dev, NULL, ainfo); @@ -594,7 +652,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, strncmp(mse->metadata_version, "external:", 9) == 0 && is_subarray(mse->metadata_version+9)) { char *sl; - strcpy(st->parent_devnm, mse->metadata_version+10); + strcpy(st->parent_devnm, mse->metadata_version + 10); sl = strchr(st->parent_devnm, '/'); if (sl) *sl = 0; @@ -603,9 +661,9 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat, if (st->metadata == NULL && st->parent_devnm[0] == 0) st->metadata = super_by_fd(fd, NULL); - for (i=0; i 0) + if (fd >= 0) close(fd); return retval; disappeared: - if (!st->err) + if (!st->err && !is_container) alert("DeviceDisappeared", dev, NULL, ainfo); st->err++; goto out; @@ -668,12 +726,10 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, int new_found = 0; char *name; - for (mse=mdstat; mse; mse=mse->next) - if (mse->devnm[0] && - (!mse->level || /* retrieve containers */ - (strcmp(mse->level, "raid0") != 0 && - strcmp(mse->level, "linear") != 0)) - ) { + for (mse = mdstat; mse; mse = mse->next) + if (mse->devnm[0] && (!mse->level || /* retrieve containers */ + (strcmp(mse->level, "raid0") != 0 && + strcmp(mse->level, "linear") != 0))) { struct state *st = xcalloc(1, sizeof *st); mdu_array_info_t array; int fd; @@ -707,7 +763,8 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist, st->percent = RESYNC_UNKNOWN; st->expected_spares = -1; if (mse->metadata_version && - strncmp(mse->metadata_version, "external:", 9) == 0 && + strncmp(mse->metadata_version, + "external:", 9) == 0 && is_subarray(mse->metadata_version+9)) { char *sl; strcpy(st->parent_devnm, @@ -729,8 +786,7 @@ static int get_required_spare_criteria(struct state *st, { int fd; - if (!st->metadata || - !st->metadata->ss->get_spare_criteria) { + if (!st->metadata || !st->metadata->ss->get_spare_criteria) { sc->min_size = 0; sc->sector_size = 0; return 0; @@ -779,14 +835,13 @@ static int check_donor(struct state *from, struct state *to) } static dev_t choose_spare(struct state *from, struct state *to, - struct domainlist *domlist, struct spare_criteria *sc) + struct domainlist *domlist, struct spare_criteria *sc) { int d; dev_t dev = 0; for (d = from->raid; !dev && d < MAX_DISKS; d++) { - if (from->devid[d] > 0 && - from->devstate[d] == 0) { + if (from->devid[d] > 0 && from->devstate[d] == 0) { struct dev_policy *pol; unsigned long long dev_size; unsigned int dev_sector_size; @@ -810,7 +865,8 @@ static dev_t choose_spare(struct state *from, struct state *to, if (from->spare_group) pol_add(&pol, pol_domain, from->spare_group, NULL); - if (domain_test(domlist, pol, to->metadata->ss->name) == 1) + if (domain_test(domlist, pol, + to->metadata->ss->name) == 1) dev = from->devid[d]; dev_policy_free(pol); } @@ -857,8 +913,8 @@ static dev_t container_choose_spare(struct state *from, struct state *to, } dp = list->devs; while (dp) { - if (dp->disk.state & (1<disk.state & (1<disk.state & (1 << MD_DISK_SYNC) && + !(dp->disk.state & (1 << MD_DISK_FAULTY))) active_cnt++; dp = dp->next; } @@ -891,8 +947,7 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info link_containers_with_subarrays(statelist); for (st = statelist; st; st = st->next) - if (st->active < st->raid && - st->spare == 0 && !st->err) { + if (st->active < st->raid && st->spare == 0 && !st->err) { struct domainlist *domlist = NULL; int d; struct state *to = st; @@ -940,9 +995,11 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info else devid = choose_spare(from, to, domlist, &sc); - if (devid > 0 - && move_spare(from->devname, to->devname, devid)) { - alert("MoveSpare", to->devname, from->devname, info); + if (devid > 0 && + move_spare(from->devname, to->devname, + devid)) { + alert("MoveSpare", to->devname, + from->devname, info); break; } } @@ -967,8 +1024,7 @@ static void link_containers_with_subarrays(struct state *list) for (st = list; st; st = st->next) if (st->parent_devnm[0]) for (cont = list; cont; cont = cont->next) - if (!cont->err && - cont->parent_devnm[0] == 0 && + if (!cont->err && cont->parent_devnm[0] == 0 && strcmp(cont->devnm, st->parent_devnm) == 0) { st->parent = cont; st->subarray = cont->subarray; @@ -977,22 +1033,88 @@ static void link_containers_with_subarrays(struct state *list) } } + +/* function: check_udev_activity + * Description: Function waits for udev to finish + * events processing. + * Returns: + * 1 - detected error while opening udev + * 2 - timeout + * 0 - successfull completion + */ +static int check_udev_activity(void) +{ + struct udev *udev = NULL; + struct udev_queue *udev_queue = NULL; + int timeout_cnt = 30; + int rc = 0; + + /* + * In rare cases systemd may not have udevm, + * in such cases just exit with rc 0 + */ + if (!use_udev()) + goto out; + + udev = udev_new(); + if (!udev) { + rc = 1; + goto out; + } + + udev_queue = udev_queue_new(udev); + if (!udev_queue) { + rc = 1; + goto out; + } + + if (udev_queue_get_queue_is_empty(udev_queue)) + goto out; + + while (!udev_queue_get_queue_is_empty(udev_queue)) { + sleep(1); + + if (timeout_cnt) + timeout_cnt--; + else { + rc = 2; + goto out; + } + } + +out: + if (udev_queue) + udev_queue_unref(udev_queue); + if (udev) + udev_unref(udev); + return rc; +} + /* Not really Monitor but ... */ int Wait(char *dev) { char devnm[32]; + dev_t rdev; + char *tmp; int rv = 1; int frozen_remaining = 3; - if (!stat_is_blkdev(dev, NULL)) + if (!stat_is_blkdev(dev, &rdev)) return 2; - strcpy(devnm, dev); + + tmp = devid2devnm(rdev); + if (!tmp) { + pr_err("Cannot get md device name.\n"); + return 2; + } + + strcpy(devnm, tmp); while(1) { struct mdstat_ent *ms = mdstat_read(1, 0); struct mdstat_ent *e; - for (e=ms ; e; e=e->next) + for (e = ms; e; e = e->next) if (strcmp(e->devnm, devnm) == 0) break; @@ -1035,10 +1157,13 @@ int Wait(char *dev) } } +/* The state "broken" is used only for RAID0/LINEAR - it's the same as + * "clean", but used in case the array has one or more members missing. + */ static char *clean_states[] = { - "clear", "inactive", "readonly", "read-auto", "clean", NULL }; + "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL }; -int WaitClean(char *dev, int sock, int verbose) +int WaitClean(char *dev, int verbose) { int fd; struct mdinfo *mdi; @@ -1096,7 +1221,8 @@ int WaitClean(char *dev, int sock, int verbose) rv = read(state_fd, buf, sizeof(buf)); if (rv < 0) break; - if (sysfs_match_word(buf, clean_states) <= 4) + if (sysfs_match_word(buf, clean_states) < + (int)ARRAY_SIZE(clean_states) - 1) break; rv = sysfs_wait(state_fd, &delay); if (rv < 0 && errno != EINTR) @@ -1105,18 +1231,18 @@ int WaitClean(char *dev, int sock, int verbose) } if (rv < 0) rv = 1; - else if (fping_monitor(sock) == 0 || - ping_monitor(mdi->text_version) == 0) { + else if (ping_monitor(mdi->text_version) == 0) { /* we need to ping to close the window between array * state transitioning to clean and the metadata being * marked clean */ rv = 0; - } else + } else { rv = 1; + pr_err("Error connecting monitor with %s\n", dev); + } if (rv && verbose) - pr_err("Error waiting for %s to be clean\n", - dev); + pr_err("Error waiting for %s to be clean\n", dev); /* restore the original safe_mode_delay */ sysfs_set_safemode(mdi, mdi->safe_mode_delay);