]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Manage: be more careful about --add attempts.
authorNeilBrown <neilb@suse.de>
Mon, 22 Nov 2010 08:35:25 +0000 (19:35 +1100)
committerNeilBrown <neilb@suse.de>
Mon, 22 Nov 2010 08:35:25 +0000 (19:35 +1100)
If an --add is requested and a re-add looks promising but fails or
cannot possibly succeed, then don't try the add.  This avoids
inadvertently turning devices into spares when an array is failed but
the devices seem to actually work.

Signed-off-by: NeilBrown <neilb@suse.de>
Manage.c
mdadm.h
tests/01r1fail
util.c

index 6e9d4a03b45e76c7e3cfb71c816ade60a6a06b0d..acfec750f714b28c9570ee408380c58cb0ac8611 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -383,6 +383,7 @@ int Manage_subdevs(char *devname, int fd,
                char *dnprintable = dv->devname;
                char *add_dev = dv->devname;
                int err;
+               int re_add_failed = 0;
 
                next = dv->next;
                jnext = 0;
@@ -662,14 +663,20 @@ int Manage_subdevs(char *devname, int fd,
                                    get_linux_version() <= 2006018)
                                        ;
                                else if (st->sb) {
+                                       struct mdinfo mdi;
+                                       st->ss->getinfo_super(st, &mdi);
                                        st->ss->uuid_from_super(st, ouuid);
-                                       if (memcmp(duuid, ouuid, sizeof(ouuid))==0) {
-                                               /* looks close enough for now.  Kernel
-                                                * will worry about whether a bitmap
-                                                * based reconstruction is possible.
+                                       if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+                                           !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+                                           memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+                                               /* look like it is worth a try.  Need to
+                                                * make sure kernel will accept it though.
                                                 */
-                                               struct mdinfo mdi;
-                                               st->ss->getinfo_super(st, &mdi);
+                                               disc.number = mdi.disk.number;
+                                               if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+                                                   || disc.major != 0 || disc.minor != 0
+                                                   || !enough_fd(fd))
+                                                       goto skip_re_add;
                                                disc.major = major(stb.st_rdev);
                                                disc.minor = minor(stb.st_rdev);
                                                disc.number = mdi.disk.number;
@@ -684,8 +691,7 @@ int Manage_subdevs(char *devname, int fd,
                                                tfd = -1;
                                                /* don't even try if disk is marked as faulty */
                                                errno = 0;
-                                               if ((disc.state & 1) == 0 &&
-                                                   ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+                                               if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
                                                        if (verbose >= 0)
                                                                fprintf(stderr, Name ": re-added %s\n", add_dev);
                                                        count++;
@@ -698,7 +704,8 @@ int Manage_subdevs(char *devname, int fd,
                                                                continue;
                                                        return 1;
                                                }
-                                               /* fall back on normal-add */
+                                       skip_re_add:
+                                               re_add_failed = 1;
                                        }
                                }
                                if (add_dev != dv->devname) {
@@ -720,6 +727,17 @@ int Manage_subdevs(char *devname, int fd,
                                                dv->devname, devname);
                                        return 1;
                                }
+                               if (re_add_failed) {
+                                       fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
+                                               dv->devname, devname);
+                                       fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
+                                               dv->devname);
+                                       fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",     
+                                               dv->devname);
+                                       if (tfd >= 0)
+                                               close(tfd);
+                                       return 1;
+                               }
                        } else {
                                /* non-persistent. Must ensure that new drive
                                 * is at least array.size big.
diff --git a/mdadm.h b/mdadm.h
index 9ad99f046b2103722077ab149b68f970981cf6b8..3e229103d9a7166de7592cfbd967b8045506e591 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -986,6 +986,7 @@ extern char *fname_from_uuid(struct supertype *st,
 extern unsigned long calc_csum(void *super, int bytes);
 extern int enough(int level, int raid_disks, int layout, int clean,
                   char *avail, int avail_disks);
+extern int enough_fd(int fd);
 extern int ask(char *mesg);
 extern unsigned long long get_component_size(int fd);
 extern void remove_partitions(int fd);
index c3786630935741ed66a7eae5449ec2a64423577b..9f5563210a9f6768bff6c232ea5fc62cfe46077d 100644 (file)
@@ -20,6 +20,7 @@ mdadm $md0 --remove $dev2 $dev1
 check nosync
 check state UUU_
 
+mdadm --zero-superblock $dev2
 mdadm $md0 -a $dev2 
 check recovery
 check wait
diff --git a/util.c b/util.c
index 0cb251c31d923d85a6a8fb198b854d9cdc2b6f49..c9976218962ab0ddeda0d4ff7ad40ab8c45b8b74 100644 (file)
--- a/util.c
+++ b/util.c
@@ -320,6 +320,36 @@ int enough(int level, int raid_disks, int layout, int clean,
        }
 }
 
+int enough_fd(int fd)
+{
+       struct mdu_array_info_s array;
+       struct mdu_disk_info_s disk;
+       int avail_disks = 0;
+       int i;
+       char *avail;
+
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
+           array.raid_disks <= 0)
+               return 0;
+       avail = calloc(array.raid_disks, 1);
+       for (i=0; i<array.raid_disks + array.nr_disks; i++) {
+               disk.number = i;
+               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+                       continue;
+               if (! (disk.state & (1<<MD_DISK_SYNC)))
+                       continue;
+               if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
+                       continue;
+               avail_disks++;
+               avail[disk.raid_disk] = 1;
+       }
+       /* This is used on an active array, so assume it is clean */
+       return enough(array.level, array.raid_disks, array.layout,
+                     1,
+                     avail, avail_disks);
+}
+
+
 const int uuid_match_any[4] = { ~0, ~0, ~0, ~0 };
 int same_uuid(int a[4], int b[4], int swapuuid)
 {