]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
make monitor/monitord more resilient to unexpected termination
authorDwight Engen <dwight.engen@oracle.com>
Wed, 2 Apr 2014 17:12:38 +0000 (13:12 -0400)
committerSerge Hallyn <serge.hallyn@ubuntu.com>
Wed, 2 Apr 2014 17:30:29 +0000 (12:30 -0500)
Reported-by: Florian Klink <flokli@flokli.de>
Signed-off-by: Dwight Engen <dwight.engen@oracle.com>
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
src/lxc/lxc_monitord.c
src/lxc/monitor.c

index f6d99d57e6bc4d0f7546af78821878c6be7e6a29..8f7e6b0a25a33ad6c2a9ab0789010841ea4e7675 100644 (file)
@@ -75,6 +75,7 @@ static int quit;
 
 static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
 {
+       struct flock lk;
        char fifo_path[PATH_MAX];
        int ret;
 
@@ -83,8 +84,8 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
                return ret;
 
        ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
-       if (ret < 0) {
-               INFO("monitor fifo %s exists, already running?", fifo_path);
+       if (ret < 0 && errno != EEXIST) {
+               INFO("failed to mknod monitor fifo %s %s", fifo_path, strerror(errno));
                return -1;
        }
 
@@ -94,6 +95,17 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
                ERROR("failed to open monitor fifo");
                return -1;
        }
+
+       lk.l_type = F_WRLCK;
+       lk.l_whence = SEEK_SET;
+       lk.l_start = 0;
+       lk.l_len = 0;
+       if (fcntl(mon->fifofd, F_SETLK, &lk) != 0) {
+               /* another lxc-monitord is already running, don't start up */
+               DEBUG("lxc-monitord already running on lxcpath %s", mon->lxcpath);
+               close(mon->fifofd);
+               return -1;
+       }
        return 0;
 }
 
@@ -264,8 +276,8 @@ static void lxc_monitord_delete(struct lxc_monitor *mon)
        lxc_monitord_sock_delete(mon);
 
        lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
-       close(mon->fifofd);
        lxc_monitord_fifo_delete(mon);
+       close(mon->fifofd);
 
        for (i = 0; i < mon->clientfds_cnt; i++) {
                lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
@@ -401,7 +413,7 @@ int main(int argc, char *argv[])
                goto out;
        }
 
-       NOTICE("monitoring lxcpath %s", mon.lxcpath);
+       NOTICE("pid:%d monitoring lxcpath %s", getpid(), mon.lxcpath);
        for(;;) {
                ret = lxc_mainloop(&mon.descr, 1000 * 30);
                if (mon.clientfds_cnt <= 0)
index e45b5cfb16f5d82d1adb1fdf41e18cf0d4cbf591..4ca4000cd5b90644e1b8c582d348089a4d0d39d1 100644 (file)
@@ -95,14 +95,20 @@ static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
        if (ret < 0)
                return;
 
-       fd = open(fifo_path, O_WRONLY);
+       /* open the fifo nonblock in case the monitor is dead, we don't want
+        * the open to wait for a reader since it may never come.
+        */
+       fd = open(fifo_path, O_WRONLY|O_NONBLOCK);
        if (fd < 0) {
-               /* it is normal for this open to fail when there is no monitor
-                * running, so we don't log it
+               /* it is normal for this open to fail ENXIO when there is no
+                * monitor running, so we don't log it
                 */
                return;
        }
 
+       if (fcntl(fd, F_SETFL, O_WRONLY) < 0)
+               return;
+
        ret = write(fd, msg, sizeof(*msg));
        if (ret != sizeof(*msg)) {
                close(fd);