A clustered disk is added by the traditional --add sequence.
However, other nodes need to acknowledge that they can "see"
the device. This is done by --cluster-confirm:
--cluster-confirm SLOTNUM:/dev/whatever (if disk is found)
or
--cluster-confirm SLOTNUM:missing (if disk is not found)
The node initiating the --add, has the disk state tagged with
MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
MD_DISK_CANDIDATE.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
- char *update, unsigned long rdev, unsigned long long array_size)
+ char *update, unsigned long rdev, unsigned long long array_size,
+ int raid_slot)
{
unsigned long long ldsize;
struct supertype *dev_st = NULL;
{
unsigned long long ldsize;
struct supertype *dev_st = NULL;
}
disc.major = major(rdev);
disc.minor = minor(rdev);
}
disc.major = major(rdev);
disc.minor = minor(rdev);
+ if (raid_slot < 0)
+ disc.number = j;
+ else
+ disc.number = raid_slot;
disc.state = 0;
if (array->not_persistent==0) {
int dfd;
disc.state = 0;
if (array->not_persistent==0) {
int dfd;
+
+ if (array->state & (1 << MD_SB_CLUSTERED)) {
+ if (dv->disposition == 'c')
+ disc.state |= (1 << MD_DISK_CANDIDATE);
+ else
+ disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+ }
+
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (tst->ss->external) {
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
+ * 'c' - confirm the device as found (for clustered environments)
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
struct mdinfo info;
int frozen = 0;
int busy = 0;
struct mdinfo info;
int frozen = 0;
int busy = 0;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n",
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
pr_err("Cannot get array info for %s\n",
+ raid_slot = -1;
+ if (dv->disposition == 'c') {
+ rv = parse_cluster_confirm_arg(dv->devname,
+ &dv->devname,
+ &raid_slot);
+ if (!rv) {
+ pr_err("Could not get the devname of cluster\n");
+ goto abort;
+ }
+ }
+
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A'
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A'
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp;
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist = NULL;
struct mddev_dev **dp;
+ if (dv->disposition == 'c') {
+ rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+ break;
+ }
+
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
+ case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
}
rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update,
}
rv = Manage_add(fd, tfd, dv, tst, &array,
force, verbose, devname, update,
+ rdev, array_size, raid_slot);
close(tfd);
tfd = -1;
if (rv < 0)
close(tfd);
tfd = -1;
if (rv < 0)
{"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action },
{"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
{"action", 1, 0, Action },
+ {"cluster-confirm", 0, 0, ClusterConfirm},
/* For Detail/Examine */
{"brief", 0, 0, Brief},
/* For Detail/Examine */
{"brief", 0, 0, Brief},
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster
+ * For clustered enviroments only.
+ */
+#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
+ * For clustered enviroments only.
+ */
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */
#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
* in container can be activated */
+#define MD_SB_CLUSTERED 5 /* MD is clustered */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35)
typedef struct mdu_version_s {
int major;
typedef struct mdu_version_s {
int major;
.BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
.BR \-\-readwrite
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag cleared.
+.TP
+.BR \-\-cluster\-confirm
+Confirm the existence of the device. This is issued in response to an \-\-add
+request by a node in a cluster. When a node adds a device it sends a message
+to all nodes in the cluster to look for a device with a UUID. This translates
+to a udev notification with the UUID of the device to be added and the slot
+number. The receiving node must acknowledge this message
+with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
+the device is found or <slot>:missing in case the device is not found.
.P
Each of these options requires that the first device listed is the array
.P
Each of these options requires that the first device listed is the array
case 'f':
case Fail:
case ReAdd: /* re-add */
case 'f':
case Fail:
case ReAdd: /* re-add */
if (!mode) {
newmode = MANAGE;
shortopt = short_bitmap_options;
if (!mode) {
newmode = MANAGE;
shortopt = short_bitmap_options;
* remove the device */
devmode = 'f';
continue;
* remove the device */
devmode = 'f';
continue;
+ case O(MANAGE, ClusterConfirm):
+ devmode = 'c';
+ continue;
case O(MANAGE,Replace):
/* Mark these devices for replacement */
devmode = 'R';
case O(MANAGE,Replace):
/* Mark these devices for replacement */
devmode = 'R';
Action,
Nodes,
ClusterName,
Action,
Nodes,
ClusterName,
};
enum prefix_standard {
};
enum prefix_standard {
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
+extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
+int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
+{
+ char *dev;
+ *slot = strtoul(input, &dev, 10);
+ if (dev == input || dev[0] != ':')
+ return -1;
+ *devname = dev+1;
+ return 0;
+}
+
void remove_partitions(int fd)
{
/* remove partitions from this block devices.
void remove_partitions(int fd)
{
/* remove partitions from this block devices.