]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - managemon.c
Get failed disk count from array state
[thirdparty/mdadm.git] / managemon.c
index 40c863f1edd57d0622d0c0eb195be4930e344767..68f0c2d3f1a5873b8ecd9bf084afd6695002eac3 100644 (file)
@@ -115,6 +115,8 @@ static void close_aa(struct active_array *aa)
        for (d = aa->info.devs; d; d = d->next) {
                close(d->recovery_fd);
                close(d->state_fd);
+               close(d->bb_fd);
+               close(d->ubb_fd);
        }
 
        if (aa->action_fd >= 0)
@@ -134,7 +136,7 @@ static void free_aa(struct active_array *aa)
        /* Note that this doesn't close fds if they are being used
         * by a clone.  ->container will be set for a clone
         */
-       dprintf("%s: sys_name: %s\n", __func__, aa->info.sys_name);
+       dprintf("sys_name: %s\n", aa->info.sys_name);
        if (!aa->container)
                close_aa(aa);
        while (aa->info.devs) {
@@ -273,8 +275,7 @@ static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
                .state = 0,
        };
 
-       dprintf("%s: add %d:%d to container\n",
-               __func__, sd->disk.major, sd->disk.minor);
+       dprintf("add %d:%d to container\n", sd->disk.major, sd->disk.minor);
 
        sd->next = st->devs;
        st->devs = sd;
@@ -289,7 +290,7 @@ static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
         */
        st2 = dup_super(st);
        if (st2->ss->load_super(st2, dfd, NULL) == 0) {
-               st2->ss->getinfo_super(st, &info, NULL);
+               st2->ss->getinfo_super(st2, &info, NULL);
                if (st->ss->compare_super(st, st2) == 0 &&
                    info.disk.raid_disk >= 0) {
                        /* Looks like a good member of array.
@@ -325,8 +326,8 @@ static void remove_disk_from_container(struct supertype *st, struct mdinfo *sd)
                .raid_disk = -1,
                .state = 0,
        };
-       dprintf("%s: remove %d:%d from container\n",
-               __func__, sd->disk.major, sd->disk.minor);
+       dprintf("remove %d:%d from container\n",
+               sd->disk.major, sd->disk.minor);
 
        st->update_tail = &update;
        st->ss->remove_from_super(st, &dk);
@@ -402,6 +403,22 @@ static void manage_container(struct mdstat_ent *mdstat,
        }
 }
 
+static int sysfs_open2(char *devnum, char *name, char *attr)
+{
+       int fd = sysfs_open(devnum, name, attr);
+       if (fd >= 0) {
+               /* seq_file in the kernel allocates buffer space
+                * on the first read.  Do that now so 'monitor'
+                * never needs too.
+                */
+               char buf[200];
+               if (read(fd, buf, sizeof(buf)) < 0)
+                       /* pretend not to ignore return value */
+                       return fd;
+       }
+       return fd;
+}
+
 static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,
                             struct active_array *aa)
 {
@@ -409,14 +426,30 @@ static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,
                return -1;
 
        *disk = *clone;
-       disk->recovery_fd = sysfs_open(aa->info.sys_name, disk->sys_name, "recovery_start");
+       disk->recovery_fd = sysfs_open2(aa->info.sys_name, disk->sys_name,
+                                       "recovery_start");
        if (disk->recovery_fd < 0)
                return -1;
-       disk->state_fd = sysfs_open(aa->info.sys_name, disk->sys_name, "state");
+       disk->state_fd = sysfs_open2(aa->info.sys_name, disk->sys_name, "state");
        if (disk->state_fd < 0) {
                close(disk->recovery_fd);
                return -1;
        }
+       disk->bb_fd = sysfs_open2(aa->info.sys_name, disk->sys_name,
+                                "bad_blocks");
+       if (disk->bb_fd < 0) {
+               close(disk->recovery_fd);
+               close(disk->state_fd);
+               return -1;
+       }
+       disk->ubb_fd = sysfs_open2(aa->info.sys_name, disk->sys_name,
+                                 "unacknowledged_bad_blocks");
+       if (disk->ubb_fd < 0) {
+               close(disk->recovery_fd);
+               close(disk->state_fd);
+               close(disk->bb_fd);
+               return -1;
+       }
        disk->prev_state = read_dev_state(disk->state_fd);
        disk->curr_state = disk->prev_state;
        disk->next = aa->info.devs;
@@ -450,9 +483,11 @@ static void manage_member(struct mdstat_ent *mdstat,
                /* Raced with something */
                return;
 
-       // FIXME
-       a->info.array.raid_disks = mdstat->raid_disks;
-       // MORE
+       if (mdstat->active) {
+               // FIXME
+               a->info.array.raid_disks = mdstat->raid_disks;
+               // MORE
+       }
 
        if (sysfs_get_ll(&a->info, NULL, "component_size", &component_size) >= 0)
                a->info.component_size = component_size << 1;
@@ -464,9 +499,9 @@ static void manage_member(struct mdstat_ent *mdstat,
                frozen = 1; /* can't read metadata_version assume the worst */
 
        /* If sync_action is not 'idle' then don't try recovery now */
-       if (!frozen
-           && sysfs_get_str(&a->info, NULL, "sync_action", buf, sizeof(buf)) > 0
-           && strncmp(buf, "idle", 4) != 0)
+       if (!frozen &&
+           sysfs_get_str(&a->info, NULL, "sync_action",
+                         buf, sizeof(buf)) > 0 && strncmp(buf, "idle", 4) != 0)
                frozen = 1;
 
        if (mdstat->level) {
@@ -492,6 +527,11 @@ static void manage_member(struct mdstat_ent *mdstat,
        if (a->container == NULL)
                return;
 
+       if (sigterm && a->info.safe_mode_delay != 1) {
+               sysfs_set_safemode(&a->info, 1);
+               a->info.safe_mode_delay = 1;
+       }
+
        /* We don't check the array while any update is pending, as it
         * might container a change (such as a spare assignment) which
         * could affect our decisions.
@@ -518,7 +558,7 @@ static void manage_member(struct mdstat_ent *mdstat,
                /* prevent the kernel from activating the disk(s) before we
                 * finish adding them
                 */
-               dprintf("%s: freezing %s\n", __func__,  a->info.sys_name);
+               dprintf("freezing %s\n", a->info.sys_name);
                sysfs_set_str(&a->info, NULL, "sync_action", "frozen");
 
                /* Add device to array and set offset/size/slot.
@@ -540,11 +580,10 @@ static void manage_member(struct mdstat_ent *mdstat,
                        usleep(15*1000);
                }
                replace_array(container, a, newa);
-               if (sysfs_set_str(&a->info, NULL, "sync_action", "recover")
-                   == 0)
+               if (sysfs_set_str(&a->info, NULL,
+                                 "sync_action", "recover") == 0)
                        newa->prev_action = recover;
-               dprintf("%s: recovery started on %s\n", __func__,
-                       a->info.sys_name);
+               dprintf("recovery started on %s\n", a->info.sys_name);
  out:
                while (newdev) {
                        d = newdev->next;
@@ -587,8 +626,8 @@ static void manage_member(struct mdstat_ent *mdstat,
                        newd = xmalloc(sizeof(*newd));
                        disk_init_and_add(newd, d, newa);
                }
-               if (sysfs_get_ll(info, NULL, "array_size", &array_size) == 0
-                   && a->info.custom_array_size > array_size*2) {
+               if (sysfs_get_ll(info, NULL, "array_size", &array_size) == 0 &&
+                   a->info.custom_array_size > array_size*2) {
                        sysfs_set_num(info, NULL, "array_size",
                                      a->info.custom_array_size/2);
                }
@@ -646,7 +685,8 @@ static void manage_new(struct mdstat_ent *mdstat,
 
        mdi = sysfs_read(-1, mdstat->devnm,
                         GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
-                        GET_DEGRADED|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+                        GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+                        GET_LAYOUT);
 
        if (!mdi)
                return;
@@ -684,14 +724,25 @@ static void manage_new(struct mdstat_ent *mdstat,
                }
        }
 
-       new->action_fd = sysfs_open(new->info.sys_name, NULL, "sync_action");
-       new->info.state_fd = sysfs_open(new->info.sys_name, NULL, "array_state");
-       new->resync_start_fd = sysfs_open(new->info.sys_name, NULL, "resync_start");
-       new->metadata_fd = sysfs_open(new->info.sys_name, NULL, "metadata_version");
-       new->sync_completed_fd = sysfs_open(new->info.sys_name, NULL, "sync_completed");
-       dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
+       new->action_fd = sysfs_open2(new->info.sys_name, NULL, "sync_action");
+       new->info.state_fd = sysfs_open2(new->info.sys_name, NULL, "array_state");
+       new->resync_start_fd = sysfs_open2(new->info.sys_name, NULL, "resync_start");
+       new->metadata_fd = sysfs_open2(new->info.sys_name, NULL, "metadata_version");
+       new->sync_completed_fd = sysfs_open2(new->info.sys_name, NULL, "sync_completed");
+
+       dprintf("inst: %s action: %d state: %d\n", inst,
                new->action_fd, new->info.state_fd);
 
+       if (sigterm)
+               new->info.safe_mode_delay = 1;
+       else if (mdi->safe_mode_delay >= 50)
+               /* Normal start, mdadm set this. */
+               new->info.safe_mode_delay = mdi->safe_mode_delay;
+       else
+               /* Restart, just pick a number */
+               new->info.safe_mode_delay = 5000;
+       sysfs_set_safemode(&new->info, new->info.safe_mode_delay);
+
        /* reshape_position is set by mdadm in sysfs
         * read this information for new arrays only (empty victim)
         */
@@ -801,7 +852,8 @@ static void handle_message(struct supertype *container, struct metadata_update *
                mu->space_list = NULL;
                mu->next = NULL;
                if (container->ss->prepare_update)
-                       container->ss->prepare_update(container, mu);
+                       if (!container->ss->prepare_update(container, mu))
+                               free_updates(&mu);
                queue_metadata_update(mu);
        }
 }