]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - mdadm.c
mdadm: improve the dlm locking mechanism for clustered raid
[thirdparty/mdadm.git] / mdadm.c
diff --git a/mdadm.c b/mdadm.c
index d6ad8dc28967e2886a2f4dee4146d1859010ace2..5afe4155d7011f2f3b653ff162dedeb940cdd911 100644 (file)
--- a/mdadm.c
+++ b/mdadm.c
@@ -57,6 +57,7 @@ int main(int argc, char *argv[])
        struct mddev_dev *devlist = NULL;
        struct mddev_dev **devlistend = & devlist;
        struct mddev_dev *dv;
+       mdu_array_info_t array;
        int devs_found = 0;
        char *symlinks = NULL;
        int grow_continue = 0;
@@ -78,6 +79,7 @@ int main(int argc, char *argv[])
                .level          = UnSet,
                .layout         = UnSet,
                .bitmap_chunk   = UnSet,
+               .consistency_policy     = CONSISTENCY_POLICY_UNKNOWN,
        };
 
        char sys_hostname[256];
@@ -102,14 +104,15 @@ int main(int argc, char *argv[])
        FILE *outf;
 
        int mdfd = -1;
+       int locked = 0;
 
        srandom(time(0) ^ getpid());
 
-       ident.uuid_set=0;
+       ident.uuid_set = 0;
        ident.level = UnSet;
        ident.raid_disks = UnSet;
-       ident.super_minor= UnSet;
-       ident.devices=0;
+       ident.super_minor = UnSet;
+       ident.devices = 0;
        ident.spare_group = NULL;
        ident.autof = 0;
        ident.st = NULL;
@@ -119,10 +122,14 @@ int main(int argc, char *argv[])
        ident.container = NULL;
        ident.member = NULL;
 
-       while ((option_index = -1) ,
-              (opt=getopt_long(argc, argv,
-                               shortopt, long_options,
-                               &option_index)) != -1) {
+       if (get_linux_version() < 2006015) {
+               pr_err("This version of mdadm does not support kernels older than 2.6.15\n");
+               exit(1);
+       }
+
+       while ((option_index = -1),
+              (opt = getopt_long(argc, argv, shortopt, long_options,
+                                 &option_index)) != -1) {
                int newmode = mode;
                /* firstly, some mode-independent options */
                switch(opt) {
@@ -396,7 +403,7 @@ int main(int argc, char *argv[])
                                pr_err("metadata information already given\n");
                                exit(2);
                        }
-                       for(i=0; !ss && superlist[i]; i++)
+                       for(i = 0; !ss && superlist[i]; i++)
                                ss = superlist[i]->match_metadata_desc(optarg);
 
                        if (!ss) {
@@ -541,7 +548,7 @@ int main(int argc, char *argv[])
 
                        case 5:
                                s.layout = map_name(r5layout, optarg);
-                               if (s.layout==UnSet) {
+                               if (s.layout == UnSet) {
                                        pr_err("layout %s not understood for raid5.\n",
                                                optarg);
                                        exit(2);
@@ -549,7 +556,7 @@ int main(int argc, char *argv[])
                                break;
                        case 6:
                                s.layout = map_name(r6layout, optarg);
-                               if (s.layout==UnSet) {
+                               if (s.layout == UnSet) {
                                        pr_err("layout %s not understood for raid6.\n",
                                                optarg);
                                        exit(2);
@@ -614,7 +621,7 @@ int main(int argc, char *argv[])
                        c.homecluster = optarg;
                        if (strlen(c.homecluster) > 64) {
                                pr_err("Cluster name too big.\n");
-                               exit(ERANGE);
+                               exit(2);
                        }
                        continue;
                case O(CREATE,'x'): /* number of spare (eXtra) disks */
@@ -664,7 +671,7 @@ int main(int argc, char *argv[])
                case O(MISC,'f'): /* force zero */
                case O(MISC,Force): /* force zero */
                case O(MANAGE,Force): /* add device which is too large */
-                       c.force=1;
+                       c.force = 1;
                        continue;
                        /* now for the Assemble options */
                case O(ASSEMBLE, FreezeReshape):   /* Freeze reshape during
@@ -768,16 +775,20 @@ int main(int argc, char *argv[])
                                continue;
                        if (strcmp(c.update, "force-no-bbl") == 0)
                                continue;
+                       if (strcmp(c.update, "ppl") == 0)
+                               continue;
+                       if (strcmp(c.update, "no-ppl") == 0)
+                               continue;
                        if (strcmp(c.update, "metadata") == 0)
                                continue;
                        if (strcmp(c.update, "revert-reshape") == 0)
                                continue;
-                       if (strcmp(c.update, "byteorder")==0) {
+                       if (strcmp(c.update, "byteorder") == 0) {
                                if (ss) {
                                        pr_err("must not set metadata type with --update=byteorder.\n");
                                        exit(2);
                                }
-                               for(i=0; !ss && superlist[i]; i++)
+                               for(i = 0; !ss && superlist[i]; i++)
                                        ss = superlist[i]->match_metadata_desc(
                                                "0.swap");
                                if (!ss) {
@@ -801,7 +812,7 @@ int main(int argc, char *argv[])
                "     'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n"
                "     'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
                "     'no-bitmap', 'metadata', 'revert-reshape'\n"
-               "     'bbl', 'no-bbl', 'force-no-bbl'\n"
+               "     'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
                                );
                        exit(outf == stdout ? 0 : 2);
 
@@ -1139,6 +1150,10 @@ int main(int argc, char *argv[])
                case O(CREATE,Bitmap): /* here we create the bitmap */
                case O(GROW,'b'):
                case O(GROW,Bitmap):
+                       if (s.bitmap_file) {
+                               pr_err("bitmap cannot be set twice. Second value: %s.\n", optarg);
+                               exit(2);
+                       }
                        if (strcmp(optarg, "internal") == 0 ||
                            strcmp(optarg, "none") == 0 ||
                            strchr(optarg, '/') != NULL) {
@@ -1211,6 +1226,16 @@ int main(int argc, char *argv[])
 
                        s.journaldisks = 1;
                        continue;
+               case O(CREATE, 'k'):
+               case O(GROW, 'k'):
+                       s.consistency_policy = map_name(consistency_policies,
+                                                       optarg);
+                       if (s.consistency_policy < CONSISTENCY_POLICY_RESYNC) {
+                               pr_err("Invalid consistency policy: %s\n",
+                                      optarg);
+                               exit(2);
+                       }
+                       continue;
                }
                /* We have now processed all the valid options. Anything else is
                 * an error
@@ -1238,9 +1263,48 @@ int main(int argc, char *argv[])
                exit(0);
        }
 
-       if (s.journaldisks && (s.level < 4 || s.level > 6)) {
-               pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
-               exit(2);
+       if (s.journaldisks) {
+               if (s.level < 4 || s.level > 6) {
+                       pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
+                       exit(2);
+               }
+               if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN &&
+                   s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+                       pr_err("--write-journal is not supported with consistency policy: %s\n",
+                              map_num(consistency_policies, s.consistency_policy));
+                       exit(2);
+               }
+       }
+
+       if (mode == CREATE &&
+           s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
+               if (s.level <= 0) {
+                       pr_err("--consistency-policy not meaningful with level %s.\n",
+                              map_num(pers, s.level));
+                       exit(2);
+               } else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
+                          !s.journaldisks) {
+                       pr_err("--write-journal is required for consistency policy: %s\n",
+                              map_num(consistency_policies, s.consistency_policy));
+                       exit(2);
+               } else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
+                          s.level != 5) {
+                       pr_err("PPL consistency policy is only supported for RAID level 5.\n");
+                       exit(2);
+               } else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
+                          (!s.bitmap_file ||
+                           strcmp(s.bitmap_file, "none") == 0)) {
+                       pr_err("--bitmap is required for consistency policy: %s\n",
+                              map_num(consistency_policies, s.consistency_policy));
+                       exit(2);
+               } else if (s.bitmap_file &&
+                          strcmp(s.bitmap_file, "none") != 0 &&
+                          s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
+                          s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+                       pr_err("--bitmap is not compatible with consistency policy: %s\n",
+                              map_num(consistency_policies, s.consistency_policy));
+                       exit(2);
+               }
        }
 
        if (!mode && devs_found) {
@@ -1372,6 +1436,22 @@ int main(int argc, char *argv[])
                /* --scan implied --brief unless -vv */
                c.brief = 1;
 
+       if (mode == CREATE) {
+               if (s.bitmap_file && strcmp(s.bitmap_file, "clustered") == 0) {
+                       locked = cluster_get_dlmlock();
+                       if (locked != 1)
+                               exit(1);
+               }
+       } else if (mode == MANAGE || mode == GROW || mode == INCREMENTAL) {
+               if (!md_get_array_info(mdfd, &array) && (devmode != 'c')) {
+                       if (array.state & (1 << MD_SB_CLUSTERED)) {
+                               locked = cluster_get_dlmlock();
+                               if (locked != 1)
+                                       exit(1);
+                       }
+               }
+       }
+
        switch(mode) {
        case MANAGE:
                /* readonly, add/remove, readwrite, runstop */
@@ -1418,7 +1498,7 @@ int main(int argc, char *argv[])
                                pr_err("can only assemble a single array when providing a backup file.\n");
                                exit(1);
                        }
-                       for (dv = devlist ; dv ; dv=dv->next) {
+                       for (dv = devlist; dv; dv = dv->next) {
                                struct mddev_ident *array_ident = conf_get_ident(dv->devname);
                                if (array_ident == NULL) {
                                        pr_err("%s not identified in config file.\n",
@@ -1480,8 +1560,13 @@ int main(int argc, char *argv[])
                                break;
                        }
 
-                       if (s.level != 1) {
-                               pr_err("--bitmap=clustered is currently supported with RAID mirror only\n");
+                       if (s.level != 1 && s.level != 10) {
+                               pr_err("--bitmap=clustered is currently supported with raid1/10 only\n");
+                               rv = 1;
+                               break;
+                       }
+                       if (s.level == 10 && !(is_near_layout_10(s.layout) || s.layout == UnSet)) {
+                               pr_err("only near layout is supported with clustered raid10\n");
                                rv = 1;
                                break;
                        }
@@ -1553,10 +1638,10 @@ int main(int argc, char *argv[])
                        else
                                c.delay = 60;
                }
-               rv= Monitor(devlist, mailaddr, program,
-                           &c, daemonise, oneshot,
-                           dosyslog, pidfile, increments,
-                           spare_sharing);
+               rv = Monitor(devlist, mailaddr, program,
+                            &c, daemonise, oneshot,
+                            dosyslog, pidfile, increments,
+                            spare_sharing);
                break;
 
        case GROW:
@@ -1574,7 +1659,10 @@ int main(int argc, char *argv[])
                                rv = 1;
                                break;
                        }
-                       sysfs_init(&sra, mdfd, NULL);
+                       if (sysfs_init(&sra, mdfd, NULL)) {
+                               rv = 1;
+                               break;
+                       }
                        if (array_size == MAX_SIZE)
                                err = sysfs_set_str(&sra, NULL, "array_size", "default");
                        else
@@ -1596,7 +1684,7 @@ int main(int argc, char *argv[])
                                rv = 1;
                                break;
                        }
-                       for (dv=devlist->next; dv ; dv=dv->next) {
+                       for (dv = devlist->next; dv; dv = dv->next) {
                                rv = Grow_Add_device(devlist->devname, mdfd,
                                                     dv->devname);
                                if (rv)
@@ -1622,6 +1710,8 @@ int main(int argc, char *argv[])
                        rv = Grow_reshape(devlist->devname, mdfd,
                                          devlist->next,
                                          data_offset, &c, &s);
+               } else if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
+                       rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
                } else if (array_size == 0)
                        pr_err("no changes to --grow\n");
                break;
@@ -1667,6 +1757,10 @@ int main(int argc, char *argv[])
                autodetect();
                break;
        }
+       if (locked)
+               cluster_release_dlmlock();
+       if (mdfd > 0)
+               close(mdfd);
        exit(rv);
 }
 
@@ -1689,7 +1783,7 @@ static int scan_assemble(struct supertype *ss,
                pr_err("No devices listed in conf file were found.\n");
                return 1;
        }
-       for (a = array_list; a ; a = a->next) {
+       for (a = array_list; a; a = a->next) {
                a->assembled = 0;
                if (a->autof == 0)
                        a->autof = c->autof;
@@ -1700,7 +1794,7 @@ static int scan_assemble(struct supertype *ss,
                failures = 0;
                successes = 0;
                rv = 0;
-               for (a = array_list; a ; a = a->next) {
+               for (a = array_list; a; a = a->next) {
                        int r;
                        if (a->assembled)
                                continue;
@@ -1766,7 +1860,7 @@ static int misc_scan(char devmode, struct context *c)
        int rv = 0;
 
        for (members = 0; members <= 1; members++) {
-               for (e=ms ; e ; e=e->next) {
+               for (e = ms; e; e = e->next) {
                        char *name = NULL;
                        struct map_ent *me;
                        struct stat stb;
@@ -1776,8 +1870,7 @@ static int misc_scan(char devmode, struct context *c)
                        if (members != member)
                                continue;
                        me = map_by_devnm(&map, e->devnm);
-                       if (me && me->path
-                           && strcmp(me->path, "/unknown") != 0)
+                       if (me && me->path && strcmp(me->path, "/unknown") != 0)
                                name = me->path;
                        if (name == NULL || stat(name, &stb) != 0)
                                name = get_md_name(e->devnm);
@@ -1790,7 +1883,7 @@ static int misc_scan(char devmode, struct context *c)
                        if (devmode == 'D')
                                rv |= Detail(name, c);
                        else
-                               rv |= WaitClean(name, -1, c->verbose);
+                               rv |= WaitClean(name, c->verbose);
                        put_md_name(name);
                }
        }
@@ -1804,7 +1897,7 @@ static int stop_scan(int verbose)
        /* Due to possible stacking of devices, repeat until
         * nothing more can be stopped
         */
-       int progress=1, err;
+       int progress = 1, err;
        int last = 0;
        int rv = 0;
        do {
@@ -1813,7 +1906,7 @@ static int stop_scan(int verbose)
 
                if (!progress) last = 1;
                progress = 0; err = 0;
-               for (e=ms ; e ; e=e->next) {
+               for (e = ms; e; e = e->next) {
                        char *name = get_md_name(e->devnm);
                        int mdfd;
 
@@ -1848,8 +1941,8 @@ static int misc_list(struct mddev_dev *devlist,
        struct mddev_dev *dv;
        int rv = 0;
 
-       for (dv=devlist ; dv; dv=(rv & 16) ? NULL : dv->next) {
-               int mdfd;
+       for (dv = devlist; dv; dv = (rv & 16) ? NULL : dv->next) {
+               int mdfd = -1;
 
                switch(dv->disposition) {
                case 'D':
@@ -1868,16 +1961,21 @@ static int misc_list(struct mddev_dev *devlist,
                        }
                        continue;
                case 'Q':
-                       rv |= Query(dv->devname); continue;
+                       rv |= Query(dv->devname);
+                       continue;
                case 'X':
-                       rv |= ExamineBitmap(dv->devname, c->brief, ss); continue;
+                       rv |= ExamineBitmap(dv->devname, c->brief, ss);
+                       continue;
                case ExamineBB:
-                       rv |= ExamineBadblocks(dv->devname, c->brief, ss); continue;
+                       rv |= ExamineBadblocks(dv->devname, c->brief, ss);
+                       continue;
                case 'W':
                case WaitOpt:
-                       rv |= Wait(dv->devname); continue;
+                       rv |= Wait(dv->devname);
+                       continue;
                case Waitclean:
-                       rv |= WaitClean(dv->devname, -1, c->verbose); continue;
+                       rv |= WaitClean(dv->devname, c->verbose);
+                       continue;
                case KillSubarray:
                        rv |= Kill_subarray(dv->devname, c->subarray, c->verbose);
                        continue;
@@ -1901,24 +1999,27 @@ static int misc_list(struct mddev_dev *devlist,
                        rv |= SetAction(dv->devname, c->action);
                        continue;
                }
-               switch(dv->devname[0] == '/') {
-                       case 0:
-                               mdfd = open_dev(dv->devname);
-                               if (mdfd >= 0) break;
-                       case 1:
-                               mdfd = open_mddev(dv->devname, 1);  
-               }
-               if (mdfd>=0) {
+
+               if (dv->devname[0] != '/')
+                       mdfd = open_dev(dv->devname);
+               if (dv->devname[0] == '/' || mdfd < 0)
+                       mdfd = open_mddev(dv->devname, 1);
+
+               if (mdfd >= 0) {
                        switch(dv->disposition) {
                        case 'R':
                                c->runstop = 1;
-                               rv |= Manage_run(dv->devname, mdfd, c); break;
+                               rv |= Manage_run(dv->devname, mdfd, c);
+                               break;
                        case 'S':
-                               rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0); break;
+                               rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0);
+                               break;
                        case 'o':
-                               rv |= Manage_ro(dv->devname, mdfd, 1); break;
+                               rv |= Manage_ro(dv->devname, mdfd, 1);
+                               break;
                        case 'w':
-                               rv |= Manage_ro(dv->devname, mdfd, -1); break;
+                               rv |= Manage_ro(dv->devname, mdfd, -1);
+                               break;
                        }
                        close(mdfd);
                } else
@@ -1931,13 +2032,15 @@ int SetAction(char *dev, char *action)
 {
        int fd = open(dev, O_RDONLY);
        struct mdinfo mdi;
+       int retval;
+
        if (fd < 0) {
                pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
                return 1;
        }
-       sysfs_init(&mdi, fd, NULL);
+       retval = sysfs_init(&mdi, fd, NULL);
        close(fd);
-       if (!mdi.sys_name[0]) {
+       if (retval) {
                pr_err("%s is no an md array\n", dev);
                return 1;
        }