From: Guoqing Jiang Date: Wed, 10 Jun 2015 05:42:08 +0000 (+0800) Subject: Add a new clustered disk X-Git-Tag: mdadm-3.4~67^2~5 X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=commitdiff_plain;h=4de90913020923b69515630b8f19094d2e0d1d5a Add a new clustered disk A clustered disk is added by the traditional --add sequence. However, other nodes need to acknowledge that they can "see" the device. This is done by --cluster-confirm: --cluster-confirm SLOTNUM:/dev/whatever (if disk is found) or --cluster-confirm SLOTNUM:missing (if disk is not found) The node initiating the --add, has the disk state tagged with MD_DISK_CLUSTER_ADD and the one confirming tag the disk with MD_DISK_CANDIDATE. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Guoqing Jiang Signed-off-by: NeilBrown --- diff --git a/Manage.c b/Manage.c index 2e602d74..e3bdfb3d 100644 --- a/Manage.c +++ b/Manage.c @@ -690,7 +690,8 @@ skip_re_add: int Manage_add(int fd, int tfd, struct mddev_dev *dv, struct supertype *tst, mdu_array_info_t *array, int force, int verbose, char *devname, - char *update, unsigned long rdev, unsigned long long array_size) + char *update, unsigned long rdev, unsigned long long array_size, + int raid_slot) { unsigned long long ldsize; struct supertype *dev_st = NULL; @@ -880,7 +881,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, } disc.major = major(rdev); disc.minor = minor(rdev); - disc.number =j; + if (raid_slot < 0) + disc.number = j; + else + disc.number = raid_slot; disc.state = 0; if (array->not_persistent==0) { int dfd; @@ -921,6 +925,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, } free(used); } + + if (array->state & (1 << MD_SB_CLUSTERED)) { + if (dv->disposition == 'c') + disc.state |= (1 << MD_DISK_CANDIDATE); + else + disc.state |= (1 << MD_DISK_CLUSTER_ADD); + } + if (dv->writemostly == 1) disc.state |= (1 << MD_DISK_WRITEMOSTLY); if (tst->ss->external) { @@ -1240,6 +1252,7 @@ int Manage_subdevs(char *devname, int fd, * variant on 'A' * 'F' - Another variant of 'A', where the device was faulty * so must be removed from the array first. + * 'c' - confirm the device as found (for clustered environments) * * For 'f' and 'r', the device can also be a kernel-internal * name such as 'sdb'. @@ -1255,6 +1268,7 @@ int Manage_subdevs(char *devname, int fd, struct mdinfo info; int frozen = 0; int busy = 0; + int raid_slot = -1; if (ioctl(fd, GET_ARRAY_INFO, &array)) { pr_err("Cannot get array info for %s\n", @@ -1283,6 +1297,17 @@ int Manage_subdevs(char *devname, int fd, int rv; int mj,mn; + raid_slot = -1; + if (dv->disposition == 'c') { + rv = parse_cluster_confirm_arg(dv->devname, + &dv->devname, + &raid_slot); + if (!rv) { + pr_err("Could not get the devname of cluster\n"); + goto abort; + } + } + if (strcmp(dv->devname, "failed") == 0 || strcmp(dv->devname, "faulty") == 0) { if (dv->disposition != 'A' @@ -1308,6 +1333,11 @@ int Manage_subdevs(char *devname, int fd, if (strcmp(dv->devname, "missing") == 0) { struct mddev_dev *add_devlist = NULL; struct mddev_dev **dp; + if (dv->disposition == 'c') { + rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL); + break; + } + if (dv->disposition != 'A') { pr_err("'missing' only meaningful with --re-add\n"); goto abort; @@ -1438,6 +1468,7 @@ int Manage_subdevs(char *devname, int fd, case 'A': case 'M': /* --re-add missing */ case 'F': /* --re-add faulty */ + case 'c': /* --cluster-confirm */ /* add the device */ if (subarray) { pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n"); @@ -1471,7 +1502,7 @@ int Manage_subdevs(char *devname, int fd, } rv = Manage_add(fd, tfd, dv, tst, &array, force, verbose, devname, update, - rdev, array_size); + rdev, array_size, raid_slot); close(tfd); tfd = -1; if (rv < 0) diff --git a/ReadMe.c b/ReadMe.c index c6286aec..c854cd5f 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -169,6 +169,7 @@ struct option long_options[] = { {"wait", 0, 0, WaitOpt}, {"wait-clean", 0, 0, Waitclean }, {"action", 1, 0, Action }, + {"cluster-confirm", 0, 0, ClusterConfirm}, /* For Detail/Examine */ {"brief", 0, 0, Brief}, diff --git a/md_p.h b/md_p.h index c4846bab..9b6b5f80 100644 --- a/md_p.h +++ b/md_p.h @@ -78,6 +78,12 @@ #define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */ #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ +#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster + * For clustered enviroments only. + */ +#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed + * For clustered enviroments only. + */ #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. * read requests will only be sent here in @@ -106,6 +112,7 @@ typedef struct mdp_device_descriptor_s { #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */ #define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays * in container can be activated */ +#define MD_SB_CLUSTERED 5 /* MD is clustered */ #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ typedef struct mdp_superblock_s { diff --git a/md_u.h b/md_u.h index be9868a7..76068d64 100644 --- a/md_u.h +++ b/md_u.h @@ -44,6 +44,7 @@ #define STOP_ARRAY _IO (MD_MAJOR, 0x32) #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35) typedef struct mdu_version_s { int major; diff --git a/mdadm.8.in b/mdadm.8.in index a07ddb7c..3dd000ce 100644 --- a/mdadm.8.in +++ b/mdadm.8.in @@ -1406,6 +1406,15 @@ will avoid reading from these devices if possible. .BR \-\-readwrite Subsequent devices that are added or re\-added will have the 'write-mostly' flag cleared. +.TP +.BR \-\-cluster\-confirm +Confirm the existence of the device. This is issued in response to an \-\-add +request by a node in a cluster. When a node adds a device it sends a message +to all nodes in the cluster to look for a device with a UUID. This translates +to a udev notification with the UUID of the device to be added and the slot +number. The receiving node must acknowledge this message +with \-\-cluster\-confirm. Valid arguments are : in case +the device is found or :missing in case the device is not found. .P Each of these options requires that the first device listed is the array diff --git a/mdadm.c b/mdadm.c index 1a32328e..859c48de 100644 --- a/mdadm.c +++ b/mdadm.c @@ -196,6 +196,7 @@ int main(int argc, char *argv[]) case 'f': case Fail: case ReAdd: /* re-add */ + case ClusterConfirm: if (!mode) { newmode = MANAGE; shortopt = short_bitmap_options; @@ -933,6 +934,9 @@ int main(int argc, char *argv[]) * remove the device */ devmode = 'f'; continue; + case O(MANAGE, ClusterConfirm): + devmode = 'c'; + continue; case O(MANAGE,Replace): /* Mark these devices for replacement */ devmode = 'R'; diff --git a/mdadm.h b/mdadm.h index f56d9d68..00c726e9 100644 --- a/mdadm.h +++ b/mdadm.h @@ -346,6 +346,7 @@ enum special_options { Action, Nodes, ClusterName, + ClusterConfirm, }; enum prefix_standard { @@ -1281,6 +1282,7 @@ extern int parse_uuid(char *str, int uuid[4]); extern int parse_layout_10(char *layout); extern int parse_layout_faulty(char *layout); extern long parse_num(char *num); +extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot); extern int check_ext2(int fd, char *name); extern int check_reiser(int fd, char *name); extern int check_raid(int fd, char *name); diff --git a/util.c b/util.c index 9ec4aefd..ea6e6889 100644 --- a/util.c +++ b/util.c @@ -280,6 +280,16 @@ long parse_num(char *num) } #endif +int parse_cluster_confirm_arg(char *input, char **devname, int *slot) +{ + char *dev; + *slot = strtoul(input, &dev, 10); + if (dev == input || dev[0] != ':') + return -1; + *devname = dev+1; + return 0; +} + void remove_partitions(int fd) { /* remove partitions from this block devices.