};
static int make_daemon(char *pidfile);
static int check_one_sharer(int scan);
+static void write_autorebuild_pid(void);
static void alert(char *event, char *dev, char *disc, struct alert_info *info);
static int check_array(struct state *st, struct mdstat_ent *mdstat,
int test, struct alert_info *info,
struct state *st2;
int finished = 0;
struct mdstat_ent *mdstat = NULL;
- char *mailfrom = NULL;
+ char *mailfrom;
struct alert_info info;
+ struct mddev_ident *mdlist;
+ int delay_for_event = c->delay;
if (!mailaddr) {
mailaddr = conf_get_mailaddr();
if (!alert_cmd) {
alert_cmd = conf_get_program();
- if (alert_cmd && ! c->scan)
+ if (alert_cmd && !c->scan)
pr_err("Monitor using program \"%s\" from config file\n",
alert_cmd);
}
info.mailfrom = mailfrom;
info.dosyslog = dosyslog;
+ if (share){
+ if (check_one_sharer(c->scan))
+ return 1;
+ }
+
if (daemonise) {
int rv = make_daemon(pidfile);
if (rv >= 0)
}
if (share)
- if (check_one_sharer(c->scan))
- return 1;
+ write_autorebuild_pid();
if (devlist == NULL) {
- struct mddev_ident *mdlist = conf_get_ident(NULL);
- for (; mdlist; mdlist=mdlist->next) {
+ mdlist = conf_get_ident(NULL);
+ for (; mdlist; mdlist = mdlist->next) {
struct state *st;
+
if (mdlist->devname == NULL)
continue;
if (strcasecmp(mdlist->devname, "<ignore>") == 0)
}
} else {
struct mddev_dev *dv;
- for (dv=devlist ; dv; dv=dv->next) {
- struct mddev_ident *mdlist = conf_get_ident(dv->devname);
+
+ for (dv = devlist; dv; dv = dv->next) {
struct state *st = xcalloc(1, sizeof *st);
+ mdlist = conf_get_ident(dv->devname);
st->devname = xstrdup(dv->devname);
st->next = statelist;
st->devnm[0] = 0;
}
}
- while (! finished) {
+ while (!finished) {
int new_found = 0;
struct state *st, **stp;
int anydegraded = 0;
+ int anyredundant = 0;
if (mdstat)
free_mdstat(mdstat);
- mdstat = mdstat_read(oneshot?0:1, 0);
- if (!mdstat)
- mdstat_close();
+ mdstat = mdstat_read(oneshot ? 0 : 1, 0);
- for (st=statelist; st; st=st->next)
+ for (st = statelist; st; st = st->next) {
if (check_array(st, mdstat, c->test, &info,
increments, c->prefer))
anydegraded = 1;
+ /* for external arrays, metadata is filled for
+ * containers only
+ */
+ if (st->metadata && st->metadata->ss->external)
+ continue;
+ if (st->err == 0 && !anyredundant)
+ anyredundant = 1;
+ }
/* now check if there are any new devices found in mdstat */
if (c->scan)
if (!new_found) {
if (oneshot)
break;
- else
- mdstat_wait(c->delay);
+ else if (!anyredundant) {
+ break;
+ }
+ else {
+ int wait_result = mdstat_wait(delay_for_event);
+
+ /*
+ * If mdmonitor is awaken by event, set small delay once
+ * to deal with udev and mdadm.
+ */
+ if (wait_result != 0) {
+ if (c->delay > 5)
+ delay_for_event = 5;
+ } else
+ delay_for_event = c->delay;
+
+ mdstat_close();
+ }
}
c->test = 0;
if (!pidfile)
printf("%d\n", pid);
else {
- FILE *pid_file;
- pid_file=fopen(pidfile, "w");
+ FILE *pid_file = NULL;
+ int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
+ 0644);
+ if (fd >= 0)
+ pid_file = fdopen(fd, "w");
if (!pid_file)
perror("cannot create pid file");
else {
perror("daemonise");
return 1;
}
- close(0);
- open("/dev/null", O_RDWR);
- dup2(0,1);
- dup2(0,2);
+ manage_fork_fds(0);
setsid();
return -1;
}
static int check_one_sharer(int scan)
{
- int pid, rv;
+ int pid;
+ FILE *comm_fp;
FILE *fp;
- char dir[20];
- char path[100];
- struct stat buf;
+ char comm_path[PATH_MAX];
+ char path[PATH_MAX];
+ char comm[20];
+
sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
fp = fopen(path, "r");
if (fp) {
if (fscanf(fp, "%d", &pid) != 1)
pid = -1;
- sprintf(dir, "/proc/%d", pid);
- rv = stat(dir, &buf);
- if (rv != -1) {
- if (scan) {
- pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
- fclose(fp);
- return 1;
- } else {
- pr_err("Warning: One autorebuild process already running.\n");
+ snprintf(comm_path, sizeof(comm_path),
+ "/proc/%d/comm", pid);
+ comm_fp = fopen(comm_path, "r");
+ if (comm_fp) {
+ if (fscanf(comm_fp, "%s", comm) &&
+ strncmp(basename(comm), Name, strlen(Name)) == 0) {
+ if (scan) {
+ pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
+ fclose(comm_fp);
+ fclose(fp);
+ return 1;
+ } else {
+ pr_err("Warning: One autorebuild process already running.\n");
+ }
}
+ fclose(comm_fp);
}
fclose(fp);
}
- if (scan) {
- if (mkdir(MDMON_DIR, S_IRWXU) < 0 &&
- errno != EEXIST) {
+ return 0;
+}
+
+static void write_autorebuild_pid()
+{
+ char path[PATH_MAX];
+ int pid;
+ FILE *fp = NULL;
+ sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
+
+ if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
+ pr_err("Can't create autorebuild.pid file\n");
+ } else {
+ int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+
+ if (fd >= 0)
+ fp = fdopen(fd, "w");
+
+ if (!fp)
pr_err("Can't create autorebuild.pid file\n");
- } else {
- fp = fopen(path, "w");
- if (!fp)
- pr_err("Cannot create autorebuild.pidfile\n");
- else {
- pid = getpid();
- fprintf(fp, "%d\n", pid);
- fclose(fp);
- }
+ else {
+ pid = getpid();
+ fprintf(fp, "%d\n", pid);
+ fclose(fp);
}
}
- return 0;
}
static void alert(char *event, char *dev, char *disc, struct alert_info *info)
if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) {
time_t now = time(0);
- printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
+ printf("%1.15s: %s on %s %s\n", ctime(&now) + 4,
+ event, dev, disc?disc:"unknown device");
}
if (info->alert_cmd) {
int pid = fork();
exit(2);
}
}
- if (info->mailaddr &&
- (strncmp(event, "Fail", 4)==0 ||
- strncmp(event, "Test", 4)==0 ||
- strncmp(event, "Spares", 6)==0 ||
- strncmp(event, "Degrade", 7)==0)) {
+ if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 ||
+ strncmp(event, "Test", 4) == 0 ||
+ strncmp(event, "Spares", 6) == 0 ||
+ strncmp(event, "Degrade", 7) == 0)) {
FILE *mp = popen(Sendmail, "w");
if (mp) {
FILE *mdstat;
if (info->mailfrom)
fprintf(mp, "From: %s\n", info->mailfrom);
else
- fprintf(mp, "From: %s monitoring <root>\n", Name);
+ fprintf(mp, "From: %s monitoring <root>\n",
+ Name);
fprintf(mp, "To: %s\n", info->mailaddr);
fprintf(mp, "Subject: %s event on %s:%s\n\n",
event, dev, hname);
int n;
fprintf(mp,
"\nP.S. The /proc/mdstat file currently contains the following:\n\n");
- while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
- n=fwrite(buf, 1, n, mp);
+ while ((n = fread(buf, 1, sizeof(buf),
+ mdstat)) > 0)
+ n = fwrite(buf, 1, n, mp);
fclose(mdstat);
}
pclose(mp);
/* Log at a different severity depending on the event.
*
* These are the critical events: */
- if (strncmp(event, "Fail", 4)==0 ||
- strncmp(event, "Degrade", 7)==0 ||
- strncmp(event, "DeviceDisappeared", 17)==0)
+ if (strncmp(event, "Fail", 4) == 0 ||
+ strncmp(event, "Degrade", 7) == 0 ||
+ strncmp(event, "DeviceDisappeared", 17) == 0)
priority = LOG_CRIT;
/* Good to know about, but are not failures: */
- else if (strncmp(event, "Rebuild", 7)==0 ||
- strncmp(event, "MoveSpare", 9)==0 ||
+ else if (strncmp(event, "Rebuild", 7) == 0 ||
+ strncmp(event, "MoveSpare", 9) == 0 ||
strncmp(event, "Spares", 6) != 0)
priority = LOG_WARNING;
/* Everything else: */
int last_disk;
int new_array = 0;
int retval;
+ int is_container = 0;
+ unsigned long redundancy_only_flags = 0;
if (test)
alert("TestMessage", dev, NULL, ainfo);
if (fd < 0)
goto disappeared;
- if (!md_array_active(fd))
+ if (st->devnm[0] == 0)
+ strcpy(st->devnm, fd2devnm(fd));
+
+ for (mse2 = mdstat; mse2; mse2 = mse2->next)
+ if (strcmp(mse2->devnm, st->devnm) == 0) {
+ mse2->devnm[0] = 0; /* flag it as "used" */
+ mse = mse2;
+ }
+
+ if (!mse) {
+ /* duplicated array in statelist
+ * or re-created after reading mdstat
+ */
+ st->err++;
+ goto out;
+ }
+
+ if (mse->level == NULL)
+ is_container = 1;
+
+ if (!is_container && !md_array_active(fd))
goto disappeared;
fcntl(fd, F_SETFD, FD_CLOEXEC);
if (md_get_array_info(fd, &array) < 0)
goto disappeared;
- if (st->devnm[0] == 0)
- strcpy(st->devnm, fd2devnm(fd));
+ if (!is_container && map_name(pers, mse->level) > 0)
+ redundancy_only_flags |= GET_MISMATCH;
+
+ sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
+ GET_STATE | redundancy_only_flags);
- sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_MISMATCH);
if (!sra)
goto disappeared;
goto out;
}
- for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
- if (strcmp(mse2->devnm, st->devnm) == 0) {
- mse2->devnm[0] = 0; /* flag it as "used" */
- mse = mse2;
- }
-
- if (!mse) {
- /* duplicated array in statelist
- * or re-created after reading mdstat*/
- st->err++;
- close(fd);
- goto out;
- }
/* this array is in /proc/mdstat */
if (array.utime == 0)
/* external arrays don't update utime, so
st->err = 0;
st->percent = RESYNC_NONE;
new_array = 1;
- alert("NewArray", st->devname, NULL, ainfo);
+ if (!is_container)
+ alert("NewArray", st->devname, NULL, ainfo);
}
- if (st->utime == array.utime && st->failed == array.failed_disks &&
- st->working == array.working_disks &&
- st->spare == array.spare_disks &&
- (mse == NULL || (mse->percent == st->percent))) {
+ if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
+ st->working == sra->array.working_disks &&
+ st->spare == sra->array.spare_disks &&
+ (mse == NULL || (mse->percent == st->percent))) {
if ((st->active < st->raid) && st->spare == 0)
retval = 1;
goto out;
mse->pattern && strchr(mse->pattern, '_') /* degraded */)
alert("DegradedArray", dev, NULL, ainfo);
- if (st->utime == 0 && /* new array */
- st->expected_spares > 0 && array.spare_disks < st->expected_spares)
+ if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
+ sra->array.spare_disks < st->expected_spares)
alert("SparesMissing", dev, NULL, ainfo);
if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
mse->percent >= 0)
alert("RebuildStarted", dev, NULL, ainfo);
if (st->percent >= 0 && mse->percent >= 0 &&
(mse->percent / increments) > (st->percent / increments)) {
- char percentalert[15];
+ char percentalert[18];
/*
* "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
*/
char cnt[80];
snprintf(cnt, sizeof(cnt),
" mismatches found: %d (on raid level %d)",
- sra->mismatch_cnt, sra->array.level);
+ sra->mismatch_cnt, sra->array.level);
alert("RebuildFinished", dev, cnt, ainfo);
} else
alert("RebuildFinished", dev, NULL, ainfo);
}
st->percent = mse->percent;
- remaining_disks = array.nr_disks;
+ remaining_disks = sra->array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
mdu_disk_info_t disc;
disc.number = i;
strncmp(mse->metadata_version, "external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
- strcpy(st->parent_devnm, mse->metadata_version+10);
+ strcpy(st->parent_devnm, mse->metadata_version + 10);
sl = strchr(st->parent_devnm, '/');
if (sl)
*sl = 0;
if (st->metadata == NULL && st->parent_devnm[0] == 0)
st->metadata = super_by_fd(fd, NULL);
- for (i=0; i<MAX_DISKS; i++) {
- mdu_disk_info_t disc = {0,0,0,0,0};
- int newstate=0;
+ for (i = 0; i < MAX_DISKS; i++) {
+ mdu_disk_info_t disc = {0, 0, 0, 0, 0};
+ int newstate = 0;
int change;
char *dv = NULL;
disc.number = i;
st->devstate[i] = newstate;
st->devid[i] = makedev(disc.major, disc.minor);
}
- st->active = array.active_disks;
- st->working = array.working_disks;
- st->spare = array.spare_disks;
- st->failed = array.failed_disks;
+ st->active = sra->array.active_disks;
+ st->working = sra->array.working_disks;
+ st->spare = sra->array.spare_disks;
+ st->failed = sra->array.failed_disks;
st->utime = array.utime;
- st->raid = array.raid_disks;
+ st->raid = sra->array.raid_disks;
st->err = 0;
if ((st->active < st->raid) && st->spare == 0)
retval = 1;
out:
if (sra)
sysfs_free(sra);
- if (fd > 0)
+ if (fd >= 0)
close(fd);
return retval;
disappeared:
- if (!st->err)
+ if (!st->err && !is_container)
alert("DeviceDisappeared", dev, NULL, ainfo);
st->err++;
goto out;
int new_found = 0;
char *name;
- for (mse=mdstat; mse; mse=mse->next)
- if (mse->devnm[0] &&
- (!mse->level || /* retrieve containers */
- (strcmp(mse->level, "raid0") != 0 &&
- strcmp(mse->level, "linear") != 0))
- ) {
+ for (mse = mdstat; mse; mse = mse->next)
+ if (mse->devnm[0] && (!mse->level || /* retrieve containers */
+ (strcmp(mse->level, "raid0") != 0 &&
+ strcmp(mse->level, "linear") != 0))) {
struct state *st = xcalloc(1, sizeof *st);
mdu_array_info_t array;
int fd;
st->percent = RESYNC_UNKNOWN;
st->expected_spares = -1;
if (mse->metadata_version &&
- strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ strncmp(mse->metadata_version,
+ "external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
strcpy(st->parent_devnm,
{
int fd;
- if (!st->metadata ||
- !st->metadata->ss->get_spare_criteria) {
+ if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
sc->min_size = 0;
sc->sector_size = 0;
return 0;
}
static dev_t choose_spare(struct state *from, struct state *to,
- struct domainlist *domlist, struct spare_criteria *sc)
+ struct domainlist *domlist, struct spare_criteria *sc)
{
int d;
dev_t dev = 0;
for (d = from->raid; !dev && d < MAX_DISKS; d++) {
- if (from->devid[d] > 0 &&
- from->devstate[d] == 0) {
+ if (from->devid[d] > 0 && from->devstate[d] == 0) {
struct dev_policy *pol;
unsigned long long dev_size;
unsigned int dev_sector_size;
if (from->spare_group)
pol_add(&pol, pol_domain,
from->spare_group, NULL);
- if (domain_test(domlist, pol, to->metadata->ss->name) == 1)
+ if (domain_test(domlist, pol,
+ to->metadata->ss->name) == 1)
dev = from->devid[d];
dev_policy_free(pol);
}
}
dp = list->devs;
while (dp) {
- if (dp->disk.state & (1<<MD_DISK_SYNC) &&
- !(dp->disk.state & (1<<MD_DISK_FAULTY)))
+ if (dp->disk.state & (1 << MD_DISK_SYNC) &&
+ !(dp->disk.state & (1 << MD_DISK_FAULTY)))
active_cnt++;
dp = dp->next;
}
link_containers_with_subarrays(statelist);
for (st = statelist; st; st = st->next)
- if (st->active < st->raid &&
- st->spare == 0 && !st->err) {
+ if (st->active < st->raid && st->spare == 0 && !st->err) {
struct domainlist *domlist = NULL;
int d;
struct state *to = st;
else
devid = choose_spare(from, to, domlist,
&sc);
- if (devid > 0
- && move_spare(from->devname, to->devname, devid)) {
- alert("MoveSpare", to->devname, from->devname, info);
+ if (devid > 0 &&
+ move_spare(from->devname, to->devname,
+ devid)) {
+ alert("MoveSpare", to->devname,
+ from->devname, info);
break;
}
}
for (st = list; st; st = st->next)
if (st->parent_devnm[0])
for (cont = list; cont; cont = cont->next)
- if (!cont->err &&
- cont->parent_devnm[0] == 0 &&
+ if (!cont->err && cont->parent_devnm[0] == 0 &&
strcmp(cont->devnm, st->parent_devnm) == 0) {
st->parent = cont;
st->subarray = cont->subarray;
int Wait(char *dev)
{
char devnm[32];
+ dev_t rdev;
+ char *tmp;
int rv = 1;
int frozen_remaining = 3;
- if (!stat_is_blkdev(dev, NULL))
+ if (!stat_is_blkdev(dev, &rdev))
+ return 2;
+
+ tmp = devid2devnm(rdev);
+ if (!tmp) {
+ pr_err("Cannot get md device name.\n");
return 2;
- strcpy(devnm, dev);
+ }
+
+ strcpy(devnm, tmp);
while(1) {
struct mdstat_ent *ms = mdstat_read(1, 0);
struct mdstat_ent *e;
- for (e=ms ; e; e=e->next)
+ for (e = ms; e; e = e->next)
if (strcmp(e->devnm, devnm) == 0)
break;
}
}
+/* The state "broken" is used only for RAID0/LINEAR - it's the same as
+ * "clean", but used in case the array has one or more members missing.
+ */
static char *clean_states[] = {
- "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+ "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
-int WaitClean(char *dev, int sock, int verbose)
+int WaitClean(char *dev, int verbose)
{
int fd;
struct mdinfo *mdi;
rv = read(state_fd, buf, sizeof(buf));
if (rv < 0)
break;
- if (sysfs_match_word(buf, clean_states) <= 4)
+ if (sysfs_match_word(buf, clean_states) <
+ (int)ARRAY_SIZE(clean_states) - 1)
break;
rv = sysfs_wait(state_fd, &delay);
if (rv < 0 && errno != EINTR)
}
if (rv < 0)
rv = 1;
- else if (fping_monitor(sock) == 0 ||
- ping_monitor(mdi->text_version) == 0) {
+ else if (ping_monitor(mdi->text_version) == 0) {
/* we need to ping to close the window between array
* state transitioning to clean and the metadata being
* marked clean
*/
rv = 0;
- } else
+ } else {
rv = 1;
+ pr_err("Error connecting monitor with %s\n", dev);
+ }
if (rv && verbose)
- pr_err("Error waiting for %s to be clean\n",
- dev);
+ pr_err("Error waiting for %s to be clean\n", dev);
/* restore the original safe_mode_delay */
sysfs_set_safemode(mdi, mdi->safe_mode_delay);