]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Manage.c
Support external metadata recovery-resume
[thirdparty/mdadm.git] / Manage.c
index 56bc2c3188af1dec8ebcbf6bb866d5584be201f5..df6079bf16afbe5fee96c82bbce5a32d564ec82d 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  *    Author: Neil Brown
- *    Email: <neilb@cse.unsw.edu.au>
- *    Paper: Neil Brown
- *           School of Computer Science and Engineering
- *           The University of New South Wales
- *           Sydney, 2052
- *           Australia
+ *    Email: <neilb@suse.de>
  */
 
 #include "mdadm.h"
 #include "md_u.h"
 #include "md_p.h"
+#include <ctype.h>
 
 #define REGISTER_DEV           _IO (MD_MAJOR, 1)
 #define START_MD               _IO (MD_MAJOR, 2)
@@ -45,12 +41,15 @@ int Manage_ro(char *devname, int fd, int readonly)
         *
         */
        mdu_array_info_t array;
+#ifndef MDASSEMBLE
        struct mdinfo *mdi;
+#endif
 
        if (md_get_version(fd) < 9000) {
                fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
                return 1;
        }
+#ifndef MDASSEMBLE
        /* If this is an externally-manage array, we need to modify the
         * metadata_version so that mdmon doesn't undo our change.
         */
@@ -92,7 +91,7 @@ int Manage_ro(char *devname, int fd, int readonly)
                }
                return 0;
        }
-
+#endif
        if (ioctl(fd, GET_ARRAY_INFO, &array)) {
                fprintf(stderr, Name ": %s does not appear to be active.\n",
                        devname);
@@ -117,6 +116,55 @@ int Manage_ro(char *devname, int fd, int readonly)
 
 #ifndef MDASSEMBLE
 
+static void remove_devices(int devnum, char *path)
+{
+       /* Remove all 'standard' devices for 'devnum', including
+        * partitions.  Also remove names at 'path' - possibly with
+        * partition suffixes - which link to those names.
+        */
+       char base[40];
+       char *path2;
+       char link[1024];
+       int n;
+       int part;
+       char *be;
+       char *pe;
+
+       if (devnum >= 0)
+               sprintf(base, "/dev/md%d", devnum);
+       else
+               sprintf(base, "/dev/md_d%d", -1-devnum);
+       be = base + strlen(base);
+       if (path) {
+               path2 = malloc(strlen(path)+20);
+               strcpy(path2, path);
+               pe = path2 + strlen(path2);
+       } else
+               path2 = path = NULL;
+       
+       for (part = 0; part < 16; part++) {
+               if (part) {
+                       sprintf(be, "p%d", part);
+                       if (path) {
+                               if (isdigit(pe[-1]))
+                                       sprintf(pe, "p%d", part);
+                               else
+                                       sprintf(pe, "%d", part);
+                       }
+               }
+               /* FIXME test if really is md device ?? */
+               unlink(base);
+               if (path) {
+                       n = readlink(path2, link, sizeof(link));
+                       if (n && strlen(base) == n &&
+                           strncmp(link, base, n) == 0)
+                               unlink(path2);
+               }
+       }
+       free(path2);
+}
+       
+
 int Manage_runstop(char *devname, int fd, int runstop, int quiet)
 {
        /* Run or stop the array. array must already be configured
@@ -160,15 +208,15 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                struct map_ent *map = NULL;
                struct stat stb;
                struct mdinfo *mdi;
+               int devnum;
                /* If this is an mdmon managed array, just write 'inactive'
                 * to the array state and let mdmon clear up.
                 */
+               devnum = fd2devnum(fd);
                mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
                if (mdi &&
                    mdi->array.level > 0 &&
                    is_subarray(mdi->text_version)) {
-                       char *cp;
-
                        /* This is mdmon managed. */
                        close(fd);
                        if (sysfs_set_str(mdi, NULL,
@@ -181,10 +229,7 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                        }
 
                        /* Give monitor a chance to act */
-                       cp = strchr(mdi->text_version+1, '/');
-                       if (*cp)
-                               *cp = 0;
-                       ping_monitor(mdi->text_version+1);
+                       ping_monitor(mdi->text_version);
 
                        fd = open(devname, O_RDONLY);
                } else if (mdi &&
@@ -197,8 +242,6 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                         */
                        ping_monitor(mdi->sys_name);
                }
-               if (mdi)
-                       sysfs_free(mdi);
 
                if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
                        if (quiet == 0) {
@@ -210,17 +253,31 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                                                "process, mounted filesystem "
                                                "or active volume group?\n");
                        }
+                       if (mdi)
+                               sysfs_free(mdi);
                        return 1;
                }
+               /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+                * was stopped, so We'll do it here just to be sure.  Drop any
+                * partitions as well...
+                */
+               if (fd >= 0)
+                       ioctl(fd, BLKRRPART, 0);
+               if (mdi)
+                       sysfs_uevent(mdi, "change");
+
+               
+               if (devnum != NoMdDev &&
+                   (stat("/dev/.udev", &stb) != 0 ||
+                    check_env("MDADM_NO_UDEV"))) {
+                       struct map_ent *mp = map_by_devnum(&map, devnum);
+                       remove_devices(devnum, mp ? mp->path : NULL);
+               }
+
 
                if (quiet <= 0)
                        fprintf(stderr, Name ": stopped %s\n", devname);
-               if (fd >= 0 && fstat(fd, &stb) == 0) {
-                       int devnum;
-                       if (major(stb.st_rdev) == MD_MAJOR)
-                               devnum = minor(stb.st_rdev);
-                       else
-                               devnum = -1-(minor(stb.st_rdev)>>6);
+               if (devnum != NoMdDev) {
                        map_delete(&map, devnum);
                        map_write(map);
                        map_free(map);
@@ -249,24 +306,6 @@ int Manage_resize(char *devname, int fd, long long size, int raid_disks)
        return 0;
 }
 
-int Manage_reconfig(char *devname, int fd, int layout)
-{
-       mdu_array_info_t info;
-       if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
-               fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
-                       devname, strerror(errno));
-               return 1;
-       }
-       info.layout = layout;
-       printf("layout set to %d\n", info.layout);
-       if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
-               fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
-                       devname, strerror(errno));
-               return 1;
-       }
-       return 0;
-}
-
 int Manage_subdevs(char *devname, int fd,
                   mddev_dev_t devlist, int verbose)
 {
@@ -385,11 +424,15 @@ int Manage_subdevs(char *devname, int fd,
                } else {
                        j = 0;
 
-                       if (stat(dv->devname, &stb)) {
+                       tfd = dev_open(dv->devname, O_RDONLY);
+                       if (tfd < 0 || fstat(tfd, &stb) != 0) {
                                fprintf(stderr, Name ": cannot find %s: %s\n",
                                        dv->devname, strerror(errno));
+                               if (tfd >= 0)
+                                       close(tfd);
                                return 1;
                        }
+                       close(tfd);
                        if ((stb.st_mode & S_IFMT) != S_IFBLK) {
                                fprintf(stderr, Name ": %s is not a "
                                        "block device.\n",
@@ -411,7 +454,7 @@ int Manage_subdevs(char *devname, int fd,
                                return 1;
                        }
                        /* Make sure it isn't in use (in 2.6 or later) */
-                       tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
+                       tfd = dev_open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
                        if (tfd < 0) {
                                fprintf(stderr, Name ": Cannot open %s: %s\n",
                                        dv->devname, strerror(errno));
@@ -448,13 +491,6 @@ int Manage_subdevs(char *devname, int fd,
                        }
 
                        if (array.not_persistent == 0 || tst->ss->external) {
-                               /* Make sure device is large enough */
-                               if (tst->ss->avail_size(tst, ldsize/512) <
-                                   array_size) {
-                                       fprintf(stderr, Name ": %s not large enough to join array\n",
-                                               dv->devname);
-                                       return 1;
-                               }
 
                                /* need to find a sample superblock to copy, and
                                 * a spare slot to use.
@@ -490,6 +526,15 @@ int Manage_subdevs(char *devname, int fd,
                                        fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
                                        return 1;
                                }
+
+                               /* Make sure device is large enough */
+                               if (tst->ss->avail_size(tst, ldsize/512) <
+                                   array_size) {
+                                       fprintf(stderr, Name ": %s not large enough to join array\n",
+                                               dv->devname);
+                                       return 1;
+                               }
+
                                /* Possibly this device was recently part of the array
                                 * and was temporarily removed, and is now being re-added.
                                 * If so, we can simply re-add it.
@@ -516,16 +561,29 @@ int Manage_subdevs(char *devname, int fd,
                                                disc.number = mdi.disk.number;
                                                disc.raid_disk = mdi.disk.raid_disk;
                                                disc.state = mdi.disk.state;
-                                               if (dv->writemostly)
+                                               if (dv->writemostly == 1)
                                                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+                                               if (dv->writemostly == 2)
+                                                       disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
                                                if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
                                                        if (verbose >= 0)
                                                                fprintf(stderr, Name ": re-added %s\n", dv->devname);
                                                        continue;
                                                }
+                                               if (errno == ENOMEM || errno == EROFS) {
+                                                       fprintf(stderr, Name ": add new device failed for %s: %s\n",
+                                                               dv->devname, strerror(errno));
+                                                       return 1;
+                                               }
                                                /* fall back on normal-add */
                                        }
                                }
+                               if (dv->re_add) {
+                                       fprintf(stderr, Name
+                                               ": --re-add for %s to %s is not possible\n",
+                                               dv->devname, devname);
+                                       return 1;
+                               }
                        } else {
                                /* non-persistent. Must ensure that new drive
                                 * is at least array.size big.
@@ -556,11 +614,14 @@ int Manage_subdevs(char *devname, int fd,
                        disc.state = 0;
                        if (array.not_persistent==0 || tst->ss->external) {
                                int dfd;
-                               if (dv->writemostly)
+                               if (dv->writemostly == 1)
                                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-                               dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
-                               tst->ss->add_to_super(tst, &disc, dfd,
-                                                     dv->devname);
+                               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+                               if (tst->ss->add_to_super(tst, &disc, dfd,
+                                                         dv->devname)) {
+                                       close(dfd);
+                                       return 1;
+                               }
                                /* write_init_super will close 'dfd' */
                                if (tst->ss->external)
                                        /* mdmon will write the metadata */
@@ -595,8 +656,9 @@ int Manage_subdevs(char *devname, int fd,
                                                disc.state |= (1<<MD_DISK_SYNC);
                                                break;
                                        }
+                               free(used);
                        }
-                       if (dv->writemostly)
+                       if (dv->writemostly == 1)
                                disc.state |= (1 << MD_DISK_WRITEMOSTLY);
                        if (tst->ss->external) {
                                /* add a disk to an external metadata container
@@ -634,7 +696,8 @@ int Manage_subdevs(char *devname, int fd,
                                tst->ss->getinfo_super(tst, &new_mdi);
                                new_mdi.disk.major = disc.major;
                                new_mdi.disk.minor = disc.minor;
-                               if (sysfs_add_disk(sra, &new_mdi) != 0) {
+                               new_mdi.recovery_start = 0;
+                               if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
                                        fprintf(stderr, Name ": add new device to external metadata"
                                                " failed for %s\n", dv->devname);
                                        close(container_fd);
@@ -678,7 +741,14 @@ int Manage_subdevs(char *devname, int fd,
                                                " to container - odd\n");
                                        return 1;
                                }
-                               if (!sysfs_unique_holder(dnum, stb.st_rdev)) {
+                               /* in the detached case it is not possible to
+                                * check if we are the unique holder, so just
+                                * rely on the 'detached' checks
+                                */
+                               if (strcmp(dv->devname, "detached") == 0 ||
+                                   sysfs_unique_holder(dnum, stb.st_rdev))
+                                       /* pass */;
+                               else {
                                        fprintf(stderr, Name
                                                ": %s is %s, cannot remove.\n",
                                                dnprintable,
@@ -717,6 +787,23 @@ int Manage_subdevs(char *devname, int fd,
                                        close(lfd);
                                return 1;
                        }
+                       if (tst->ss->external) {
+                               /*
+                                * Before dropping our exclusive open we make an
+                                * attempt at preventing mdmon from seeing an
+                                * 'add' event before reconciling this 'remove'
+                                * event.
+                                */
+                               char *name = devnum2devname(fd2devnum(fd));
+
+                               if (!name) {
+                                       fprintf(stderr, Name ": unable to get container name\n");
+                                       return 1;
+                               }
+
+                               ping_manager(name);
+                               free(name);
+                       }
                        close(lfd);
                        if (verbose >= 0)
                                fprintf(stderr, Name ": hot removed %s\n",