handle Manage_subdevs() for 'external' arrays
authorDan Williams <dan.j.williams@intel.com>
Thu, 15 May 2008 06:48:35 +0000 (16:48 +1000)
committerNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:35 +0000 (16:48 +1000)
From: Dan Williams <dan.j.williams@intel.com>

1/ Block attempts to add/remove devices from container members
2/ Forward add/remove requests to containers

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Makefile
Manage.c
mdadm.h
msg.c [new file with mode: 0644]
msg.h [new file with mode: 0644]
util.c

index 47654b9..46d7594 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -70,12 +70,12 @@ OBJS =  mdadm.o config.o mdstat.o  ReadMe.o util.o Manage.o Assemble.o Build.o \
        Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
        Incremental.o \
        mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
-       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o
+       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o
 SRCS =  mdadm.c config.c mdstat.c  ReadMe.c util.c Manage.c Assemble.c Build.c \
        Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
        Incremental.c \
        mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
-       restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c
+       restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c
 
 STATICSRC = pwgr.c
 STATICOBJS = pwgr.o
index 4202fd9..003d815 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -171,6 +171,54 @@ int Manage_reconfig(char *devname, int fd, int layout)
        return 0;
 }
 
+static int
+add_remove_device_container(int fd, int add_remove, struct stat *stb)
+{
+       int devnum = fd2devnum(fd);
+       char *devname = devnum2devname(devnum);
+       int sfd = devname ? connect_monitor(devname) : -1;
+       struct md_message msg;
+       int err = 0;
+
+       if (devname && sfd < 0) {
+               fprintf(stderr, Name ": Cannot connect to monitor for %s: %s\n",
+                       devname, strerror(errno));
+               free(devname);
+               return 1;
+       } else if (sfd < 0) {
+               fprintf(stderr, Name ": Cannot determine container name for"
+                       " device number %d\n", devnum);
+               return 1;
+       }
+
+       if (add_remove)
+               ack(sfd, 0, 0);
+       else if (send_remove_device(sfd, stb->st_rdev, 0, 0) != 0) {
+               fprintf(stderr, Name ": Failed to send \'%s device\'"
+                       " message to the container monitor\n",
+                       add_remove ? "add" : "remove");
+               err = 1;
+       }
+
+       /* check the reply */
+       if (!err && receive_message(sfd, &msg, 0) != 0) {
+               fprintf(stderr, Name ": Failed to receive an acknowledgement"
+                       " from the container monitor\n");
+               err = 1;
+       }
+
+       if (!err && msg.seq != 0) {
+               fprintf(stderr, Name ": %s device failed error code %d\n",
+                       add_remove ? "Add" : "Remove", msg.seq);
+               err = 1;
+       }
+
+       free(devname);
+       close(sfd);
+
+       return err;
+}
+
 int Manage_subdevs(char *devname, int fd,
                   mddev_dev_t devlist, int verbose)
 {
@@ -306,7 +354,13 @@ int Manage_subdevs(char *devname, int fd,
                        return 1;
                case 'a':
                        /* add the device */
-
+                       if (tst == &supertype_container_member) {
+                               fprintf(stderr, Name ": Cannot add disks to a"
+                                       " \'member\' array, perform this"
+                                       " operation on the parent container\n");
+                               return 1;
+                       } else if (tst->ss->external)
+                               return add_remove_device_container(fd, 1, &stb);
                        /* Make sure it isn't in use (in 2.6 or later) */
                        tfd = open(dv->devname, O_RDONLY|O_EXCL);
                        if (tfd < 0) {
@@ -497,6 +551,13 @@ int Manage_subdevs(char *devname, int fd,
 
                case 'r':
                        /* hot remove */
+                       if (tst == &supertype_container_member) {
+                               fprintf(stderr, Name ": Cannot remove disks from a"
+                                       " \'member\' array, perform this"
+                                       " operation on the parent container\n");
+                               return 1;
+                       } else if (tst->ss->external)
+                               return add_remove_device_container(fd, 0, &stb);
                        /* FIXME check that it is a current member */
                        if (ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev)) {
                                fprintf(stderr, Name ": hot remove failed "
diff --git a/mdadm.h b/mdadm.h
index 176d1c3..64f41fd 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -76,6 +76,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
 #include       "md_u.h"
 #include       "md_p.h"
 #include       "bitmap.h"
+#include       "msg.h"
 
 #include <endian.h>
 /* Redhat don't like to #include <asm/byteorder.h>, and
@@ -407,6 +408,7 @@ struct supertype {
        void *info;
 };
 
+extern struct supertype supertype_container_member;
 extern struct supertype *super_by_fd(int fd);
 extern struct supertype *guess_super(int fd);
 extern struct supertype *dup_super(struct supertype *st);
diff --git a/msg.c b/msg.c
new file mode 100644 (file)
index 0000000..6082365
--- /dev/null
+++ b/msg.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ *     mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "mdadm.h"
+
+enum tx_rx_state {
+       TX_RX_START,
+       TX_RX_SEQ,
+       TX_RX_NUM_BYTES,
+       TX_RX_BUF,
+       TX_RX_END,
+       TX_RX_SUCCESS,
+       TX_RX_ERR,
+};
+
+const int start_magic = 0x5a5aa5a5;
+const int end_magic = 0xa5a55a5a;
+
+#define txrx(fd, buf, size, flags) (recv_send ? \
+       recv(fd, buf, size, flags) : \
+       send(fd, buf, size, flags))
+
+/* non-blocking send/receive with n second timeout */
+static enum tx_rx_state
+tx_rx_message(int fd, struct md_message *msg, int recv_send, int tmo)
+{
+       int d = recv_send ? 0 : start_magic;
+       int flags = recv_send ? 0 : MSG_NOSIGNAL;
+       enum tx_rx_state state = TX_RX_START;
+       void *buf = &d;
+       size_t size = sizeof(d);
+       off_t n = 0;
+       int rc;
+       int again;
+
+       do {
+               again = 0;
+               rc = txrx(fd, buf + n, size - n, flags);
+               if (rc <= 0) { /* error */
+                       if (rc == -1 && errno == EAGAIN)
+                               again = 1;
+                       else
+                               state = TX_RX_ERR;
+               } else if (rc + n == size) /* done */
+                       switch (state) {
+                       case TX_RX_START:
+                               if (recv_send && d != start_magic)
+                                       state = TX_RX_ERR;
+                               else {
+                                       state = TX_RX_SEQ;
+                                       buf = &msg->seq;
+                                       size = sizeof(msg->seq);
+                                       n = 0;
+                               }
+                               break;
+                       case TX_RX_SEQ:
+                               state = TX_RX_NUM_BYTES;
+                               buf = &msg->num_bytes;
+                               size = sizeof(msg->num_bytes);
+                               n = 0;
+                               break;
+                       case TX_RX_NUM_BYTES:
+                               if (msg->num_bytes >
+                                   sizeof(union md_message_commands))
+                                       state = TX_RX_ERR;
+                               else if (recv_send && msg->num_bytes) {
+                                       msg->buf = malloc(msg->num_bytes);
+                                       if (!msg->buf)
+                                               state = TX_RX_ERR;
+                                       else {
+                                               state = TX_RX_BUF;
+                                               buf = msg->buf;
+                                               size = msg->num_bytes;
+                                               n = 0;
+                                       }
+                               } else if (!recv_send && msg->num_bytes) {
+                                       state = TX_RX_BUF;
+                                       buf = msg->buf;
+                                       size = msg->num_bytes;
+                                       n = 0;
+                               } else {
+                                       d = recv_send ? 0 : end_magic;
+                                       state = TX_RX_END;
+                                       buf = &d;
+                                       size = sizeof(d);
+                                       n = 0;
+                               }
+                               break;
+                       case TX_RX_BUF:
+                               d = recv_send ? 0 : end_magic;
+                               state = TX_RX_END;
+                               buf = &d;
+                               size = sizeof(d);
+                               n = 0;
+                               break;
+                       case TX_RX_END:
+                               if (recv_send && d != end_magic)
+                                       state = TX_RX_ERR;
+                               else
+                                       state = TX_RX_SUCCESS;
+                               break;
+                       case TX_RX_ERR:
+                       case TX_RX_SUCCESS:
+                               break;
+                       }
+               else /* continue */
+                       n += rc;
+
+               if (again) {
+                       fd_set set;
+                       struct timeval timeout = { tmo, 0 };
+                       struct timeval *ptmo = tmo ? &timeout : NULL;
+
+                       FD_ZERO(&set);
+                       FD_SET(fd, &set);
+
+                       if (recv_send)
+                               rc = select(fd + 1, &set, NULL, NULL, ptmo);
+                       else
+                               rc = select(fd + 1, NULL, &set, NULL, ptmo);
+
+                       if (rc <= 0)
+                               state = TX_RX_ERR;
+               }
+       } while (state < TX_RX_SUCCESS);
+
+       return state;
+}
+
+
+int receive_message(int fd, struct md_message *msg, int tmo)
+{
+       if (tx_rx_message(fd, msg, 1, tmo) == TX_RX_SUCCESS)
+               return 0;
+       else
+               return -1;
+}
+
+int send_message(int fd, struct md_message *msg, int tmo)
+{
+       if (tx_rx_message(fd, msg, 0, tmo) == TX_RX_SUCCESS)
+               return 0;
+       else
+               return -1;
+}
+
+int ack(int fd, int seq, int tmo)
+{
+       struct md_message msg = { .seq = seq, .num_bytes = 0 };
+
+       return send_message(fd, &msg, tmo);
+}
+
+int nack(int fd, int err, int tmo)
+{
+       struct md_message msg = { .seq = err, .num_bytes = 0 };
+
+       return send_message(fd, &msg, tmo);
+}
+
+int send_remove_device(int fd, dev_t rdev, int seq, int tmo)
+{
+       struct md_remove_device_cmd cmd = { .action = md_action_remove_device,
+                                           .rdev = rdev
+                                         };
+       struct md_message msg = { .seq = seq,
+                                 .num_bytes = sizeof(cmd),
+                                 .buf = &cmd
+                               };
+
+       return send_message(fd, &msg, tmo);
+}
+
+int connect_monitor(char *devname)
+{
+       char path[100];
+       int sfd;
+       long fl;
+       struct sockaddr_un addr;
+
+       sprintf(path, "/var/run/mdadm/%s.sock", devname);
+       sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
+       if (sfd < 0)
+               return -1;
+
+       addr.sun_family = PF_LOCAL;
+       strcpy(addr.sun_path, path);
+       if (connect(sfd, &addr, sizeof(addr)) < 0) {
+               close(sfd);
+               return -1;
+       }
+
+       fl = fcntl(sfd, F_GETFL, 0);
+       fl |= O_NONBLOCK;
+       fcntl(sfd, F_SETFL, fl);
+
+       return sfd;
+}
+
+int ping_monitor(char *devname)
+{
+       int sfd = connect_monitor(devname);
+       struct md_message msg;
+       int err = 0;
+
+       if (sfd < 0)
+               return sfd;
+
+       /* try to ping existing socket */
+       if (ack(sfd, 0, 0) != 0)
+               err = -1;
+
+       /* check the reply */
+       if (!err && receive_message(sfd, &msg, 0) != 0)
+               err = -1;
+
+       if (msg.seq != 0)
+               err = -1;
+
+       close(sfd);
+       return err;
+}
diff --git a/msg.h b/msg.h
new file mode 100644 (file)
index 0000000..84ee9b3
--- /dev/null
+++ b/msg.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ *     mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+struct mdinfo;
+struct md_message {
+       int seq;
+       int num_bytes;
+       void *buf;
+};
+
+enum md_message_action {
+       md_action_ping_monitor,
+       md_action_remove_device,
+};
+
+struct md_generic_cmd {
+       enum md_message_action action;
+};
+
+struct md_remove_device_cmd {
+       enum md_message_action action;
+       dev_t rdev;
+};
+
+/* union of all known command types, used to sanity check ->num_bytes
+ * on the receive path
+ */
+union md_message_commands {
+       struct md_generic_cmd generic;
+       struct md_remove_device_cmd remove;
+};
+
+extern const int start_magic;
+extern const int end_magic;
+
+extern int receive_message(int fd, struct md_message *msg, int tmo);
+extern int send_message(int fd, struct md_message *msg, int tmo);
+extern int ack(int fd, int seq, int tmo);
+extern int nack(int fd, int err, int tmo);
+extern int connect_monitor(char *devname);
+extern int ping_monitor(char *devname);
+extern int send_remove_device(int fd, dev_t rdev, int seq, int tmo);
+
diff --git a/util.c b/util.c
index 4acb367..c627382 100644 (file)
--- a/util.c
+++ b/util.c
@@ -783,6 +783,9 @@ int dev_open(char *dev, int flags)
 struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
 
 #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+
+struct supertype supertype_container_member;
+
 struct supertype *super_by_fd(int fd)
 {
        mdu_array_info_t array;
@@ -812,8 +815,11 @@ struct supertype *super_by_fd(int fd)
                sprintf(version, "%d.%d", vers, minor);
                verstr = version;
        }
-       for (i = 0; st == NULL && superlist[i] ; i++)
-               st = superlist[i]->match_metadata_desc(verstr);
+       if (minor == -2 && verstr[0] == '/')
+               st = &supertype_container_member;
+       else
+               for (i = 0; st == NULL && superlist[i] ; i++)
+                       st = superlist[i]->match_metadata_desc(verstr);
 
        if (sra)
                sysfs_free(sra);