]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - managemon.c
mdmon: periodically retry to create the socket
[thirdparty/mdadm.git] / managemon.c
index 9612007752e1cd6f1771b45629fea2313e6b4780..730334cd6d278a2770caeebb6e52d0185f256b63 100644 (file)
@@ -265,8 +265,11 @@ static void manage_container(struct mdstat_ent *mdstat,
                 * These need to be remove from, or added to, the array
                 */
                mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS);
-               if (!mdi)
+               if (!mdi) {
+                       /* invalidate the current count so we can try again */
+                       container->devcnt = -1;
                        return;
+               }
 
                /* check for removals */
                for (cdp = &container->devs; *cdp; ) {
@@ -343,7 +346,11 @@ static void manage_member(struct mdstat_ent *mdstat,
                                struct mdinfo *newd;
                                if (sysfs_add_disk(&newa->info, d) < 0)
                                        continue;
-                               newd = newa->info.devs;
+                               newd = malloc(sizeof(*newd));
+                               *newd = *d;
+                               newd->next = newa->info.devs;
+                               newa->info.devs = newd;
+
                                newd->state_fd = sysfs_open(a->devnum,
                                                            newd->sys_name,
                                                            "state");
@@ -460,6 +467,7 @@ static void manage_new(struct mdstat_ent *mdstat,
        new->action_fd = sysfs_open(new->devnum, NULL, "sync_action");
        new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
        new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
+       new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
        get_resync_start(new);
        dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
                new->action_fd, new->info.state_fd);
@@ -474,8 +482,13 @@ static void manage_new(struct mdstat_ent *mdstat,
                        mdstat->metadata_version);
                new->container = NULL;
                free_aa(new);
-       } else
+       } else {
                replace_array(container, victim, new);
+               if (failed) {
+                       new->check_degraded = 1;
+                       manage_member(mdstat, new);
+               }
+       }
 }
 
 void manage(struct mdstat_ent *mdstat, struct supertype *container)
@@ -492,7 +505,8 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container)
                        continue;
                }
                if (mdstat->metadata_version == NULL ||
-                   strncmp(mdstat->metadata_version, "external:/", 10) != 0 ||
+                   strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
+                   !is_subarray(mdstat->metadata_version+9) ||
                    strncmp(mdstat->metadata_version+10, container->devname,
                            strlen(container->devname)) != 0 ||
                    mdstat->metadata_version[10+strlen(container->devname)]
@@ -518,14 +532,15 @@ static void handle_message(struct supertype *container, struct metadata_update *
 
        struct metadata_update *mu;
 
-       if (msg->len == 0) {
-               int cnt;
-               
+       if (msg->len <= 0)
                while (update_queue_pending || update_queue) {
                        check_update_queue(container);
                        usleep(15*1000);
                }
 
+       if (msg->len == 0) { /* ping_monitor */
+               int cnt;
+               
                cnt = monitor_loop_cnt;
                if (cnt & 1)
                        cnt += 2; /* wait until next pselect */
@@ -535,6 +550,11 @@ static void handle_message(struct supertype *container, struct metadata_update *
 
                while (monitor_loop_cnt - cnt < 0)
                        usleep(10 * 1000);
+       } else if (msg->len == -1) { /* ping_manager */
+               struct mdstat_ent *mdstat = mdstat_read(1, 0);
+
+               manage(mdstat, container);
+               free_mdstat(mdstat);
        } else {
                mu = malloc(sizeof(*mu));
                mu->len = msg->len;
@@ -586,9 +606,13 @@ void do_manager(struct supertype *container)
 {
        struct mdstat_ent *mdstat;
        sigset_t set;
+       int proc_fd;
 
        sigprocmask(SIG_UNBLOCK, NULL, &set);
        sigdelset(&set, SIGUSR1);
+       sigdelset(&set, SIGHUP);
+       sigdelset(&set, SIGALRM);
+       proc_fd = open("/proc/mounts", O_RDONLY);
 
        do {
 
@@ -606,6 +630,15 @@ void do_manager(struct supertype *container)
 
                        read_sock(container);
 
+                       if (container->sock < 0 || socket_hup_requested) {
+                               close(container->sock);
+                               container->sock = make_control_sock(container->devname);
+                               make_pidfile(container->devname, 0);
+                               socket_hup_requested = 0;
+                       }
+                       if (container->sock < 0)
+                               alarm(30);
+
                        free_mdstat(mdstat);
                }
                remove_old();
@@ -614,9 +647,12 @@ void do_manager(struct supertype *container)
 
                manager_ready = 1;
 
-               if (update_queue == NULL)
-                       mdstat_wait_fd(container->sock, &set);
-               else
+               if (update_queue == NULL) {
+                       if (container->sock < 0)
+                               mdstat_wait_fd(proc_fd, &set);
+                       else
+                               mdstat_wait_fd(container->sock, &set);
+               } else
                        /* If an update is happening, just wait for signal */
                        pselect(0, NULL, NULL, NULL, NULL, &set);
        } while(1);