]> git.ipfire.org Git - thirdparty/mdadm.git/blame - Manage.c
revert-reshape: only impose reshape_position tests on raid[456]
[thirdparty/mdadm.git] / Manage.c
CommitLineData
64c4757e 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
64c4757e 3 *
7bd04da9 4 * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
64c4757e
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
64c4757e
NB
23 */
24
9a9dab36 25#include "mdadm.h"
682c7051
NB
26#include "md_u.h"
27#include "md_p.h"
4ccad7b1 28#include <ctype.h>
64c4757e 29
1011e834
N
30#define REGISTER_DEV _IO (MD_MAJOR, 1)
31#define START_MD _IO (MD_MAJOR, 2)
32#define STOP_MD _IO (MD_MAJOR, 3)
82b27616 33
64c4757e
NB
34int Manage_ro(char *devname, int fd, int readonly)
35{
682c7051
NB
36 /* switch to readonly or rw
37 *
38 * requires >= 0.90.0
39 * first check that array is runing
40 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
41 *
42 */
43 mdu_array_info_t array;
0e600426 44#ifndef MDASSEMBLE
e9dd1598 45 struct mdinfo *mdi;
0e600426 46#endif
b73e45ae 47 int rv = 0;
aba69144 48
682c7051 49 if (md_get_version(fd) < 9000) {
e7b84f9d 50 pr_err("need md driver version 0.90.0 or later\n");
682c7051
NB
51 return 1;
52 }
0e600426 53#ifndef MDASSEMBLE
7bd04da9 54 /* If this is an externally-managed array, we need to modify the
e9dd1598
N
55 * metadata_version so that mdmon doesn't undo our change.
56 */
4dd2df09 57 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
e9dd1598
N
58 if (mdi &&
59 mdi->array.major_version == -1 &&
e9dd1598
N
60 is_subarray(mdi->text_version)) {
61 char vers[64];
62 strcpy(vers, "external:");
63 strcat(vers, mdi->text_version);
64 if (readonly > 0) {
65 int rv;
66 /* We set readonly ourselves. */
67 vers[9] = '-';
68 sysfs_set_str(mdi, NULL, "metadata_version", vers);
69
70 close(fd);
71 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
72
73 if (rv < 0) {
e7b84f9d 74 pr_err("failed to set readonly for %s: %s\n",
e9dd1598
N
75 devname, strerror(errno));
76
77 vers[9] = mdi->text_version[0];
78 sysfs_set_str(mdi, NULL, "metadata_version", vers);
b73e45ae
JS
79 rv = 1;
80 goto out;
e9dd1598
N
81 }
82 } else {
83 char *cp;
84 /* We cannot set read/write - must signal mdmon */
85 vers[9] = '/';
86 sysfs_set_str(mdi, NULL, "metadata_version", vers);
87
88 cp = strchr(vers+10, '/');
1471b8b1 89 if (cp)
e9dd1598
N
90 *cp = 0;
91 ping_monitor(vers+10);
9ea5a252
DW
92 if (mdi->array.level <= 0)
93 sysfs_set_str(mdi, NULL, "array_state", "active");
e9dd1598 94 }
b73e45ae 95 goto out;
e9dd1598 96 }
0e600426 97#endif
682c7051 98 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
e7b84f9d 99 pr_err("%s does not appear to be active.\n",
682c7051 100 devname);
b73e45ae
JS
101 rv = 1;
102 goto out;
682c7051 103 }
aba69144 104
7bd04da9 105 if (readonly > 0) {
682c7051 106 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
e7b84f9d 107 pr_err("failed to set readonly for %s: %s\n",
682c7051 108 devname, strerror(errno));
b73e45ae
JS
109 rv = 1;
110 goto out;
682c7051
NB
111 }
112 } else if (readonly < 0) {
113 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
e7b84f9d 114 pr_err("failed to set writable for %s: %s\n",
682c7051 115 devname, strerror(errno));
b73e45ae
JS
116 rv = 1;
117 goto out;
682c7051
NB
118 }
119 }
b73e45ae
JS
120out:
121#ifndef MDASSEMBLE
122 if (mdi)
123 sysfs_free(mdi);
124#endif
125 return rv;
64c4757e
NB
126}
127
435d4ebb
LB
128#ifndef MDASSEMBLE
129
4dd2df09 130static void remove_devices(char *devnm, char *path)
4ccad7b1 131{
7bd04da9 132 /*
b1702f48
N
133 * Remove names at 'path' - possibly with
134 * partition suffixes - which link to the 'standard'
4dd2df09 135 * name for devnm. These were probably created
b1702f48 136 * by mdadm when the array was assembled.
4ccad7b1
N
137 */
138 char base[40];
139 char *path2;
140 char link[1024];
141 int n;
142 int part;
143 char *be;
144 char *pe;
145
b1702f48
N
146 if (!path)
147 return;
148
4dd2df09 149 sprintf(base, "/dev/%s", devnm);
4ccad7b1 150 be = base + strlen(base);
b1702f48 151
503975b9 152 path2 = xmalloc(strlen(path)+20);
b1702f48
N
153 strcpy(path2, path);
154 pe = path2 + strlen(path2);
7bd04da9 155
4ccad7b1
N
156 for (part = 0; part < 16; part++) {
157 if (part) {
158 sprintf(be, "p%d", part);
b1702f48
N
159
160 if (isdigit(pe[-1]))
161 sprintf(pe, "p%d", part);
162 else
163 sprintf(pe, "%d", part);
4ccad7b1 164 }
b1702f48 165 n = readlink(path2, link, sizeof(link));
d9ca03e9 166 if (n > 0 && (int)strlen(base) == n &&
b1702f48
N
167 strncmp(link, base, n) == 0)
168 unlink(path2);
4ccad7b1 169 }
0eb26465 170 free(path2);
4ccad7b1 171}
4ccad7b1 172
fe7e0e64 173int Manage_run(char *devname, int fd, int verbose)
64c4757e 174{
fe7e0e64
N
175 /* Run the array. Array must already be configured
176 * Requires >= 0.90.0
682c7051 177 */
682c7051 178 mdu_param_t param; /* unused */
bccd8153 179 int rv = 0;
82b27616 180
fe7e0e64
N
181 if (md_get_version(fd) < 9000) {
182 pr_err("need md driver version 0.90.0 or later\n");
183 return 1;
184 }
185
186 if (ioctl(fd, RUN_ARRAY, &param)) {
187 if (verbose >= 0)
188 pr_err("failed to run array %s: %s\n",
189 devname, strerror(errno));
190 return 1;
191 }
192 if (verbose >= 0)
193 pr_err("started %s\n", devname);
194 return rv;
195}
196
197int Manage_stop(char *devname, int fd, int verbose, int will_retry)
198{
199 /* Stop the array. Array must already be configured
200 * 'will_retry' means that error messages are not wanted.
201 */
202 int rv = 0;
203 struct map_ent *map = NULL;
204 struct mdinfo *mdi;
205 char devnm[32];
206 char container[32];
207 int err;
208 int count;
2eba8496
N
209 char buf[32];
210 unsigned long long rd1, rd2;
fe7e0e64 211
ba728be7
N
212 if (will_retry && verbose == 0)
213 verbose = -1;
214
fe7e0e64 215 if (md_get_version(fd) < 9000) {
7bd04da9
N
216 if (ioctl(fd, STOP_MD, 0) == 0)
217 return 0;
218 pr_err("stopping device %s "
219 "failed: %s\n",
220 devname, strerror(errno));
221 return 1;
82b27616 222 }
aba69144 223
fe7e0e64
N
224 /* If this is an mdmon managed array, just write 'inactive'
225 * to the array state and let mdmon clear up.
226 */
227 strcpy(devnm, fd2devnm(fd));
228 /* Get EXCL access first. If this fails, then attempting
229 * to stop is probably a bad idea.
230 */
2eba8496 231 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
fe7e0e64
N
232 if (mdi && is_subarray(mdi->text_version)) {
233 char *sl;
234 strncpy(container, mdi->text_version+1, sizeof(container));
235 container[sizeof(container)-1] = 0;
236 sl = strchr(container, '/');
237 if (sl)
238 *sl = 0;
239 } else
240 container[0] = 0;
241 close(fd);
242 count = 5;
243 while (((fd = ((devnm[0] == '/')
244 ?open(devname, O_RDONLY|O_EXCL)
245 :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
246 || strcmp(fd2devnm(fd), devnm) != 0)
247 && container[0]
248 && mdmon_running(container)
249 && count) {
250 if (fd >= 0)
251 close(fd);
252 flush_mdmon(container);
253 count--;
682c7051 254 }
fe7e0e64
N
255 if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
256 if (fd >= 0)
257 close(fd);
ba728be7 258 if (verbose >= 0)
fe7e0e64
N
259 pr_err("Cannot get exclusive access to %s:"
260 "Perhaps a running "
261 "process, mounted filesystem "
262 "or active volume group?\n",
263 devname);
264 return 1;
265 }
266 if (mdi &&
267 mdi->array.level > 0 &&
268 is_subarray(mdi->text_version)) {
eb0af526 269 int err;
fe7e0e64 270 /* This is mdmon managed. */
eb0af526 271 close(fd);
daf7a3ce 272
eb0af526
N
273 /* As we have an O_EXCL open, any use of the device
274 * which blocks STOP_ARRAY is probably a transient use,
275 * so it is reasonable to retry for a while - 5 seconds.
276 */
fe7e0e64
N
277 count = 25;
278 while (count &&
279 (err = sysfs_set_str(mdi, NULL,
280 "array_state",
281 "inactive")) < 0
eb0af526
N
282 && errno == EBUSY) {
283 usleep(200000);
fe7e0e64 284 count--;
eb0af526 285 }
fe7e0e64
N
286 if (err) {
287 if (verbose >= 0)
e7b84f9d
N
288 pr_err("failed to stop array %s: %s\n",
289 devname, strerror(errno));
bccd8153
JS
290 rv = 1;
291 goto out;
682c7051 292 }
fe7e0e64
N
293
294 /* Give monitor a chance to act */
295 ping_monitor(mdi->text_version);
296
297 fd = open_dev_excl(devnm);
298 if (fd < 0) {
299 if (verbose >= 0)
300 pr_err("failed to completely stop %s"
301 ": Device is busy\n",
302 devname);
303 rv = 1;
304 goto out;
305 }
306 } else if (mdi &&
307 mdi->array.major_version == -1 &&
308 mdi->array.minor_version == -2 &&
309 !is_subarray(mdi->text_version)) {
310 struct mdstat_ent *mds, *m;
311 /* container, possibly mdmon-managed.
312 * Make sure mdmon isn't opening it, which
313 * would interfere with the 'stop'
97590376 314 */
fe7e0e64 315 ping_monitor(mdi->sys_name);
daf7a3ce 316
fe7e0e64
N
317 /* now check that there are no existing arrays
318 * which are members of this array
319 */
320 mds = mdstat_read(0, 0);
321 for (m = mds; m; m = m->next)
322 if (m->metadata_version &&
323 strncmp(m->metadata_version, "external:", 9)==0 &&
324 metadata_container_matches(m->metadata_version+9,
325 devnm)) {
326 if (verbose >= 0)
327 pr_err("Cannot stop container %s: "
328 "member %s still active\n",
329 devname, m->dev);
330 free_mdstat(mds);
331 rv = 1;
332 goto out;
333 }
334 }
335
2eba8496
N
336 /* If the array is undergoing a reshape which changes the number
337 * of devices, then it would be nice to stop it at a point where
338 * it has completed a full number of stripes in both old and
339 * new layouts as this will allow the reshape to be reverted.
340 * So if 'sync_action' is "reshape" and 'raid_disks' shows two
341 * different numbers, then
342 * - freeze reshape
343 * - set sync_max to next multiple of both data_disks and
344 * chunk sizes (or next but one)
345 * - unfreeze reshape
346 * - wait on 'sync_completed' for that point to be reached.
347 */
348 if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
349 sysfs_attribute_available(mdi, NULL, "sync_action") &&
350 sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
351 sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
352 strcmp(buf, "reshape\n") == 0 &&
353 sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
354 sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
355 /* Array is frozen */
356 unsigned long long position, curr;
357 unsigned long long chunk1, chunk2;
358 unsigned long long rddiv, chunkdiv;
359 unsigned long long sectors;
360 int backwards = 0;
361 int delay;
362 int scfd;
363
364 rd1 -= mdi->array.level == 6 ? 2 : 1;
365 rd2 -= mdi->array.level == 6 ? 2 : 1;
366 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
367 if (strncmp(buf, "back", 4) == 0)
368 backwards = 1;
369 sysfs_get_ll(mdi, NULL, "reshape_position", &position);
370 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
371 chunk1 /= 512;
372 chunk2 /= 512;
373 rddiv = GCD(rd1, rd2);
374 chunkdiv = GCD(chunk1, chunk2);
375 sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
376
377 if (backwards) {
378 /* Need to subtract 'reshape_position' from
379 * array size to get equivalent of sync_max.
380 * Size calculation based on raid5_size in kernel.
381 */
382 unsigned long long size = mdi->component_size;
383 size &= ~(chunk1-1);
384 size &= ~(chunk2-1);
385 /* rd1 must be smaller */
386 size *= rd1;
387 position = size - position;
388 position = (position/sectors + 2) * sectors;
389 sysfs_set_num(mdi, NULL, "sync_max", position/rd1);
390 position = size - position;
391 } else {
392 position = (position/sectors + 2) * sectors;
393 sysfs_set_num(mdi, NULL, "sync_max", position/rd1);
394 }
395 sysfs_set_str(mdi, NULL, "sync_action", "idle");
396
397 /* That should have set things going again. Now we
398 * wait a little while (5 seconds) for sync_completed
399 * to reach the target.
400 */
401 delay = 500;
402 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
403 while (scfd >= 0 && delay > 0) {
404 sysfs_fd_get_str(scfd, buf, sizeof(buf));
405 if (strncmp(buf, "none", 4) == 0)
406 break;
407 sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
408 if (!backwards && curr >= position)
409 break;
410 if (backwards && curr <= position)
411 break;
412 sysfs_wait(scfd, &delay);
413 }
414 if (scfd >= 0)
415 close(scfd);
416
417 }
418
fe7e0e64
N
419 /* As we have an O_EXCL open, any use of the device
420 * which blocks STOP_ARRAY is probably a transient use,
421 * so it is reasonable to retry for a while - 5 seconds.
422 */
423 count = 25; err = 0;
424 while (count && fd >= 0
425 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
426 && errno == EBUSY) {
427 usleep(200000);
428 count --;
429 }
430 if (fd >= 0 && err) {
431 if (verbose >= 0) {
432 pr_err("failed to stop array %s: %s\n",
433 devname, strerror(errno));
434 if (errno == EBUSY)
435 cont_err("Perhaps a running "
436 "process, mounted filesystem "
437 "or active volume group?\n");
4ccad7b1 438 }
fe7e0e64
N
439 rv = 1;
440 goto out;
441 }
442 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
443 * was stopped, so We'll do it here just to be sure. Drop any
444 * partitions as well...
445 */
446 if (fd >= 0)
447 ioctl(fd, BLKRRPART, 0);
448 if (mdi)
449 sysfs_uevent(mdi, "change");
4ccad7b1 450
fe7e0e64
N
451 if (devnm[0] && use_udev()) {
452 struct map_ent *mp = map_by_devnm(&map, devnm);
453 remove_devices(devnm, mp ? mp->path : NULL);
682c7051 454 }
fe7e0e64
N
455
456 if (verbose >= 0)
457 pr_err("stopped %s\n", devname);
458 map_lock(&map);
459 map_remove(&map, devnm);
460 map_unlock(&map);
461out:
462 if (mdi)
463 sysfs_free(mdi);
464
bccd8153 465 return rv;
64c4757e
NB
466}
467
5e73b024
N
468static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
469{
470 struct mddev_dev *new;
471 new = xmalloc(sizeof(*new));
472 memset(new, 0, sizeof(*new));
473 new->devname = xstrdup(name);
474 new->disposition = disp;
475 new->next = dv->next;
476 dv->next = new;
477 return new;
478}
479
1d997643
N
480static void add_faulty(struct mddev_dev *dv, int fd, char disp)
481{
482 mdu_array_info_t array;
483 mdu_disk_info_t disk;
484 int remaining_disks;
485 int i;
486
487 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
488 return;
489
490 remaining_disks = array.nr_disks;
491 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
1d997643
N
492 char buf[40];
493 disk.number = i;
494 if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
495 continue;
496 if (disk.major == 0 && disk.minor == 0)
497 continue;
498 remaining_disks--;
499 if ((disk.state & 1) == 0) /* not faulty */
500 continue;
501 sprintf(buf, "%d:%d", disk.major, disk.minor);
5e73b024 502 dv = add_one(dv, buf, disp);
1d997643
N
503 }
504}
505
506static void add_detached(struct mddev_dev *dv, int fd, char disp)
507{
508 mdu_array_info_t array;
509 mdu_disk_info_t disk;
510 int remaining_disks;
511 int i;
512
513 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
514 return;
515
516 remaining_disks = array.nr_disks;
517 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
1d997643
N
518 char buf[40];
519 int sfd;
520 disk.number = i;
521 if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
522 continue;
523 if (disk.major == 0 && disk.minor == 0)
524 continue;
525 remaining_disks--;
526 if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
527 continue;
528 sprintf(buf, "%d:%d", disk.major, disk.minor);
529 sfd = dev_open(buf, O_RDONLY);
530 if (sfd >= 0) {
531 /* Not detached */
532 close(sfd);
533 continue;
534 }
535 if (errno != ENXIO)
536 /* Probably not detached */
537 continue;
5e73b024 538 dv = add_one(dv, buf, disp);
1d997643
N
539 }
540}
541
64a78416
N
542static void add_set(struct mddev_dev *dv, int fd, char set_char)
543{
544 mdu_array_info_t array;
545 mdu_disk_info_t disk;
546 int remaining_disks;
547 int copies, set;
548 int i;
549
550 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
551 return;
552 if (array.level != 10)
553 return;
554 copies = ((array.layout & 0xff) *
555 ((array.layout >> 8) & 0xff));
556 if (array.raid_disks % copies)
557 return;
558
559 remaining_disks = array.nr_disks;
560 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
561 char buf[40];
562 disk.number = i;
563 if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
564 continue;
565 if (disk.major == 0 && disk.minor == 0)
566 continue;
567 remaining_disks--;
568 set = disk.raid_disk % copies;
569 if (set_char != set + 'A')
570 continue;
571 sprintf(buf, "%d:%d", disk.major, disk.minor);
572 dv = add_one(dv, buf, dv->disposition);
573 }
574}
575
abe94694
N
576int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
577 struct supertype *dev_st, struct supertype *tst,
578 unsigned long rdev,
579 char *update, char *devname, int verbose,
580 mdu_array_info_t *array)
581{
582 struct mdinfo mdi;
583 int duuid[4];
584 int ouuid[4];
585
586 dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
587 dev_st->ss->uuid_from_super(dev_st, ouuid);
588 if (tst->sb)
589 tst->ss->uuid_from_super(tst, duuid);
590 else
591 /* Assume uuid matches: kernel will check */
592 memcpy(duuid, ouuid, sizeof(ouuid));
593 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
594 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
595 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
596 /* Looks like it is worth a
597 * try. Need to make sure
598 * kernel will accept it
599 * though.
600 */
601 mdu_disk_info_t disc;
602 /* re-add doesn't work for version-1 superblocks
603 * before 2.6.18 :-(
604 */
605 if (array->major_version == 1 &&
606 get_linux_version() <= 2006018)
607 goto skip_re_add;
608 disc.number = mdi.disk.number;
609 if (ioctl(fd, GET_DISK_INFO, &disc) != 0
610 || disc.major != 0 || disc.minor != 0
611 )
612 goto skip_re_add;
613 disc.major = major(rdev);
614 disc.minor = minor(rdev);
615 disc.number = mdi.disk.number;
616 disc.raid_disk = mdi.disk.raid_disk;
617 disc.state = mdi.disk.state;
618 if (dv->writemostly == 1)
619 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
620 if (dv->writemostly == 2)
621 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
622 remove_partitions(tfd);
623 if (update || dv->writemostly > 0) {
624 int rv = -1;
625 tfd = dev_open(dv->devname, O_RDWR);
626 if (tfd < 0) {
627 pr_err("failed to open %s for"
628 " superblock update during re-add\n", dv->devname);
629 return -1;
630 }
631
632 if (dv->writemostly == 1)
633 rv = dev_st->ss->update_super(
634 dev_st, NULL, "writemostly",
635 devname, verbose, 0, NULL);
636 if (dv->writemostly == 2)
637 rv = dev_st->ss->update_super(
638 dev_st, NULL, "readwrite",
639 devname, verbose, 0, NULL);
640 if (update)
641 rv = dev_st->ss->update_super(
642 dev_st, NULL, update,
643 devname, verbose, 0, NULL);
644 if (rv == 0)
645 rv = dev_st->ss->store_super(dev_st, tfd);
646 close(tfd);
647 if (rv != 0) {
648 pr_err("failed to update"
649 " superblock during re-add\n");
650 return -1;
651 }
652 }
653 /* don't even try if disk is marked as faulty */
654 errno = 0;
655 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
656 if (verbose >= 0)
657 pr_err("re-added %s\n", dv->devname);
658 return 1;
659 }
660 if (errno == ENOMEM || errno == EROFS) {
661 pr_err("add new device failed for %s: %s\n",
662 dv->devname, strerror(errno));
663 if (dv->disposition == 'M')
664 return 0;
665 return -1;
666 }
667 }
668skip_re_add:
669 return 0;
670}
671
38aeaf3a
N
672int Manage_add(int fd, int tfd, struct mddev_dev *dv,
673 struct supertype *tst, mdu_array_info_t *array,
674 int force, int verbose, char *devname,
675 char *update, unsigned long rdev, unsigned long long array_size)
676{
677 unsigned long long ldsize;
6d43efb5 678 struct supertype *dev_st = NULL;
38aeaf3a
N
679 int j;
680 mdu_disk_info_t disc;
681
682 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
683 if (dv->disposition == 'M')
684 return 0;
685 else
686 return -1;
687 }
688
689 if (tst->ss->validate_geometry(
690 tst, array->level, array->layout,
691 array->raid_disks, NULL,
af4348dd 692 ldsize >> 9, INVALID_SECTORS, NULL, NULL, 0) == 0) {
38aeaf3a
N
693 if (!force) {
694 pr_err("%s is larger than %s can "
695 "effectively use.\n"
696 " Add --force is you "
697 "really want to add this device.\n",
698 dv->devname, devname);
699 return -1;
700 }
701 pr_err("%s is larger than %s can "
702 "effectively use.\n"
703 " Adding anyway as --force "
704 "was given.\n",
705 dv->devname, devname);
706 }
707 if (!tst->ss->external &&
708 array->major_version == 0 &&
709 md_get_version(fd)%100 < 2) {
710 if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
711 if (verbose >= 0)
712 pr_err("hot added %s\n",
713 dv->devname);
714 return 1;
715 }
716
717 pr_err("hot add failed for %s: %s\n",
718 dv->devname, strerror(errno));
719 return -1;
720 }
721
722 if (array->not_persistent == 0 || tst->ss->external) {
723
724 /* need to find a sample superblock to copy, and
725 * a spare slot to use.
726 * For 'external' array (well, container based),
727 * We can just load the metadata for the array->
728 */
729 int array_failed;
730 if (tst->sb)
731 /* already loaded */;
732 else if (tst->ss->external) {
733 tst->ss->load_container(tst, fd, NULL);
734 } else for (j = 0; j < tst->max_devs; j++) {
735 char *dev;
736 int dfd;
737 disc.number = j;
738 if (ioctl(fd, GET_DISK_INFO, &disc))
739 continue;
740 if (disc.major==0 && disc.minor==0)
741 continue;
742 if ((disc.state & 4)==0) /* sync */
743 continue;
744 /* Looks like a good device to try */
745 dev = map_dev(disc.major, disc.minor, 1);
746 if (!dev)
747 continue;
748 dfd = dev_open(dev, O_RDONLY);
749 if (dfd < 0)
750 continue;
751 if (tst->ss->load_super(tst, dfd,
752 NULL)) {
753 close(dfd);
754 continue;
755 }
756 close(dfd);
757 break;
758 }
759 /* FIXME this is a bad test to be using */
760 if (!tst->sb && dv->disposition != 'a') {
761 /* we are re-adding a device to a
762 * completely dead array - have to depend
763 * on kernel to check
764 */
765 } else if (!tst->sb) {
766 pr_err("cannot load array metadata from %s\n", devname);
767 return -1;
768 }
769
770 /* Make sure device is large enough */
387fcd59 771 if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
38aeaf3a
N
772 array_size) {
773 if (dv->disposition == 'M')
774 return 0;
775 pr_err("%s not large enough to join array\n",
776 dv->devname);
777 return -1;
778 }
779
780 /* Possibly this device was recently part of
781 * the array and was temporarily removed, and
782 * is now being re-added. If so, we can
783 * simply re-add it.
784 */
785
786 if (array->not_persistent==0) {
787 dev_st = dup_super(tst);
788 dev_st->ss->load_super(dev_st, tfd, NULL);
789 }
790 if (dev_st && dev_st->sb) {
791 int rv = attempt_re_add(fd, tfd, dv,
792 dev_st, tst,
793 rdev,
794 update, devname,
795 verbose,
796 array);
797 dev_st->ss->free_super(dev_st);
798 if (rv)
799 return rv;
800 }
801 if (dv->disposition == 'M') {
802 if (verbose > 0)
803 pr_err("--re-add for %s to %s is not possible\n",
804 dv->devname, devname);
805 return 0;
806 }
807 if (dv->disposition == 'A') {
808 pr_err("--re-add for %s to %s is not possible\n",
809 dv->devname, devname);
810 return -1;
811 }
812 if (array->active_disks < array->raid_disks) {
813 char *avail = xcalloc(array->raid_disks, 1);
814 int d;
815 int found = 0;
816
817 for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
818 disc.number = d;
819 if (ioctl(fd, GET_DISK_INFO, &disc))
820 continue;
821 if (disc.major == 0 && disc.minor == 0)
822 continue;
823 if (!(disc.state & (1<<MD_DISK_SYNC)))
824 continue;
825 avail[disc.raid_disk] = 1;
826 found++;
827 }
828 array_failed = !enough(array->level, array->raid_disks,
829 array->layout, 1, avail);
830 } else
831 array_failed = 0;
832 if (array_failed) {
833 pr_err("%s has failed so using --add cannot work and might destroy\n",
834 devname);
835 pr_err("data on %s. You should stop the array and re-assemble it.\n",
836 dv->devname);
837 return -1;
838 }
839 } else {
840 /* non-persistent. Must ensure that new drive
841 * is at least array->size big.
842 */
843 if (ldsize/512 < array_size) {
844 pr_err("%s not large enough to join array\n",
845 dv->devname);
846 return -1;
847 }
848 }
849 /* committed to really trying this device now*/
850 remove_partitions(tfd);
851
852 /* in 2.6.17 and earlier, version-1 superblocks won't
853 * use the number we write, but will choose a free number.
854 * we must choose the same free number, which requires
855 * starting at 'raid_disks' and counting up
856 */
857 for (j = array->raid_disks; j < tst->max_devs; j++) {
858 disc.number = j;
859 if (ioctl(fd, GET_DISK_INFO, &disc))
860 break;
861 if (disc.major==0 && disc.minor==0)
862 break;
863 if (disc.state & 8) /* removed */
864 break;
865 }
866 disc.major = major(rdev);
867 disc.minor = minor(rdev);
868 disc.number =j;
869 disc.state = 0;
870 if (array->not_persistent==0) {
871 int dfd;
872 if (dv->writemostly == 1)
873 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
874 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
875 if (tst->ss->add_to_super(tst, &disc, dfd,
72ca9bcf 876 dv->devname, INVALID_SECTORS))
38aeaf3a
N
877 return -1;
878 if (tst->ss->write_init_super(tst))
879 return -1;
880 } else if (dv->disposition == 'A') {
881 /* this had better be raid1.
882 * As we are "--re-add"ing we must find a spare slot
883 * to fill.
884 */
885 char *used = xcalloc(array->raid_disks, 1);
886 for (j = 0; j < tst->max_devs; j++) {
887 mdu_disk_info_t disc2;
888 disc2.number = j;
889 if (ioctl(fd, GET_DISK_INFO, &disc2))
890 continue;
891 if (disc2.major==0 && disc2.minor==0)
892 continue;
893 if (disc2.state & 8) /* removed */
894 continue;
895 if (disc2.raid_disk < 0)
896 continue;
897 if (disc2.raid_disk > array->raid_disks)
898 continue;
899 used[disc2.raid_disk] = 1;
900 }
901 for (j = 0 ; j < array->raid_disks; j++)
902 if (!used[j]) {
903 disc.raid_disk = j;
904 disc.state |= (1<<MD_DISK_SYNC);
905 break;
906 }
907 free(used);
908 }
909 if (dv->writemostly == 1)
910 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
911 if (tst->ss->external) {
912 /* add a disk
913 * to an external metadata container */
914 struct mdinfo new_mdi;
915 struct mdinfo *sra;
916 int container_fd;
4dd2df09 917 char devnm[32];
38aeaf3a
N
918 int dfd;
919
4dd2df09
N
920 strcpy(devnm, fd2devnm(fd));
921
922 container_fd = open_dev_excl(devnm);
38aeaf3a
N
923 if (container_fd < 0) {
924 pr_err("add failed for %s:"
925 " could not get exclusive access to container\n",
926 dv->devname);
927 tst->ss->free_super(tst);
928 return -1;
929 }
930
9cf9a1de 931 Kill(dv->devname, NULL, 0, -1, 0);
38aeaf3a 932 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
4dd2df09 933 if (mdmon_running(tst->container_devnm))
38aeaf3a
N
934 tst->update_tail = &tst->updates;
935 if (tst->ss->add_to_super(tst, &disc, dfd,
72ca9bcf 936 dv->devname, INVALID_SECTORS)) {
38aeaf3a
N
937 close(dfd);
938 close(container_fd);
939 return -1;
940 }
941 if (tst->update_tail)
942 flush_metadata_updates(tst);
943 else
944 tst->ss->sync_metadata(tst);
945
4dd2df09 946 sra = sysfs_read(container_fd, NULL, 0);
38aeaf3a
N
947 if (!sra) {
948 pr_err("add failed for %s: sysfs_read failed\n",
949 dv->devname);
950 close(container_fd);
951 tst->ss->free_super(tst);
952 return -1;
953 }
954 sra->array.level = LEVEL_CONTAINER;
955 /* Need to set data_offset and component_size */
956 tst->ss->getinfo_super(tst, &new_mdi, NULL);
957 new_mdi.disk.major = disc.major;
958 new_mdi.disk.minor = disc.minor;
959 new_mdi.recovery_start = 0;
960 /* Make sure fds are closed as they are O_EXCL which
961 * would block add_disk */
962 tst->ss->free_super(tst);
963 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
964 pr_err("add new device to external metadata"
965 " failed for %s\n", dv->devname);
966 close(container_fd);
967 sysfs_free(sra);
968 return -1;
969 }
4dd2df09 970 ping_monitor(devnm);
38aeaf3a
N
971 sysfs_free(sra);
972 close(container_fd);
973 } else {
974 tst->ss->free_super(tst);
975 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
976 pr_err("add new device failed for %s as %d: %s\n",
977 dv->devname, j, strerror(errno));
978 return -1;
979 }
980 }
981 if (verbose >= 0)
982 pr_err("added %s\n", dv->devname);
983 return 1;
984}
985
d070235d
N
986int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
987 int sysfd, unsigned long rdev, int verbose, char *devname)
988{
989 int lfd = -1;
990 int err;
991
992 if (tst->ss->external) {
993 /* To remove a device from a container, we must
994 * check that it isn't in use in an array.
995 * This involves looking in the 'holders'
996 * directory - there must be just one entry,
997 * the container.
998 * To ensure that it doesn't get used as a
999 * hot spare while we are checking, we
1000 * get an O_EXCL open on the container
1001 */
aab15415 1002 int ret;
4dd2df09
N
1003 char devnm[32];
1004 strcpy(devnm, fd2devnm(fd));
1005 lfd = open_dev_excl(devnm);
d070235d
N
1006 if (lfd < 0) {
1007 pr_err("Cannot get exclusive access "
1008 " to container - odd\n");
1009 return -1;
1010 }
aab15415
N
1011 /* We may not be able to check on holders in
1012 * sysfs, either because we don't have the dev num
1013 * (rdev == 0) or because the device has been detached
1014 * and the 'holders' directory no longer exists
1015 * (ret == -1). In that case, assume it is OK to
1016 * remove.
d070235d 1017 */
aab15415
N
1018 if (rdev == 0)
1019 ret = -1;
1020 else
4dd2df09 1021 ret = sysfs_unique_holder(devnm, rdev);
aab15415
N
1022 if (ret == 0) {
1023 pr_err("%s is not a member, cannot remove.\n",
1024 dv->devname);
1025 close(lfd);
1026 return -1;
1027 }
1028 if (ret >= 2) {
1029 pr_err("%s is still in use, cannot remove.\n",
1030 dv->devname);
d070235d
N
1031 close(lfd);
1032 return -1;
1033 }
1034 }
1035 /* FIXME check that it is a current member */
1036 if (sysfd >= 0) {
1037 /* device has been removed and we don't know
1038 * the major:minor number
1039 */
1040 int n = write(sysfd, "remove", 6);
1041 if (n != 6)
1042 err = -1;
1043 else
1044 err = 0;
1045 } else {
1046 err = ioctl(fd, HOT_REMOVE_DISK, rdev);
1047 if (err && errno == ENODEV) {
1048 /* Old kernels rejected this if no personality
1049 * is registered */
4dd2df09 1050 struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
d070235d
N
1051 struct mdinfo *dv = NULL;
1052 if (sra)
1053 dv = sra->devs;
1054 for ( ; dv ; dv=dv->next)
1055 if (dv->disk.major == (int)major(rdev) &&
1056 dv->disk.minor == (int)minor(rdev))
1057 break;
1058 if (dv)
1059 err = sysfs_set_str(sra, dv,
1060 "state", "remove");
1061 else
1062 err = -1;
1063 if (sra)
1064 sysfs_free(sra);
1065 }
1066 }
1067 if (err) {
1068 pr_err("hot remove failed "
1069 "for %s: %s\n", dv->devname,
1070 strerror(errno));
1071 if (lfd >= 0)
1072 close(lfd);
1073 return -1;
1074 }
1075 if (tst->ss->external) {
1076 /*
1077 * Before dropping our exclusive open we make an
1078 * attempt at preventing mdmon from seeing an
1079 * 'add' event before reconciling this 'remove'
1080 * event.
1081 */
4dd2df09 1082 char *devnm = fd2devnm(fd);
d070235d 1083
4dd2df09 1084 if (!devnm) {
d070235d
N
1085 pr_err("unable to get container name\n");
1086 return -1;
1087 }
1088
4dd2df09 1089 ping_manager(devnm);
d070235d
N
1090 }
1091 if (lfd >= 0)
1092 close(lfd);
1093 if (verbose >= 0)
1094 pr_err("hot removed %s from %s\n",
1095 dv->devname, devname);
1096 return 1;
1097}
1098
70c55e36
N
1099int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
1100 unsigned long rdev, int verbose, char *devname)
1101{
1102 struct mdinfo *mdi, *di;
1103 if (tst->ss->external) {
1104 pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
1105 return -1;
1106 }
1107 /* Need to find the device in sysfs and add 'want_replacement' to the
1108 * status.
1109 */
4dd2df09 1110 mdi = sysfs_read(fd, NULL, GET_DEVS);
70c55e36
N
1111 if (!mdi || !mdi->devs) {
1112 pr_err("Cannot find status of %s to enable replacement - strange\n",
1113 devname);
1114 return -1;
1115 }
1116 for (di = mdi->devs; di; di = di->next)
1117 if (di->disk.major == (int)major(rdev) &&
1118 di->disk.minor == (int)minor(rdev))
1119 break;
1120 if (di) {
1121 int rv;
1122 if (di->disk.raid_disk < 0) {
1123 pr_err("%s is not active and so cannot be replaced.\n",
1124 dv->devname);
1125 sysfs_free(mdi);
1126 return -1;
1127 }
1128 rv = sysfs_set_str(mdi, di,
1129 "state", "want_replacement");
1130 if (rv) {
1131 sysfs_free(mdi);
1132 pr_err("Failed to request replacement for %s\n",
1133 dv->devname);
1134 return -1;
1135 }
1136 if (verbose >= 0)
1137 pr_err("Marked %s (device %d in %s) for replacement\n",
1138 dv->devname, di->disk.raid_disk, devname);
1139 /* If there is a matching 'with', we need to tell it which
1140 * raid disk
1141 */
1142 while (dv && dv->disposition != 'W')
1143 dv = dv->next;
1144 if (dv) {
1145 dv->disposition = 'w';
1146 dv->used = di->disk.raid_disk;
1147 }
1148 return 1;
1149 }
1150 sysfs_free(mdi);
1151 pr_err("%s not found in %s so cannot --replace it\n",
1152 dv->devname, devname);
1153 return -1;
1154}
1155
1156int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
1157 unsigned long rdev, int verbose, char *devname)
1158{
1159 struct mdinfo *mdi, *di;
1160 /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
4dd2df09 1161 mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
70c55e36
N
1162 if (!mdi || !mdi->devs) {
1163 pr_err("Cannot find status of %s to enable replacement - strange\n",
1164 devname);
1165 return -1;
1166 }
1167 for (di = mdi->devs; di; di = di->next)
1168 if (di->disk.major == (int)major(rdev) &&
1169 di->disk.minor == (int)minor(rdev))
1170 break;
1171 if (di) {
1172 int rv;
1173 if (di->disk.state & (1<<MD_DISK_FAULTY)) {
1174 pr_err("%s is faulty and cannot be a replacement\n",
1175 dv->devname);
1176 sysfs_free(mdi);
1177 return -1;
1178 }
1179 if (di->disk.raid_disk >= 0) {
1180 pr_err("%s is active and cannot be a replacement\n",
1181 dv->devname);
1182 sysfs_free(mdi);
1183 return -1;
1184 }
1185 rv = sysfs_set_num(mdi, di,
1186 "slot", dv->used);
1187 if (rv) {
1188 sysfs_free(mdi);
1189 pr_err("Failed to %s as preferred replacement.\n",
1190 dv->devname);
1191 return -1;
1192 }
1193 if (verbose >= 0)
1194 pr_err("Marked %s in %s as replacement for device %d\n",
1195 dv->devname, devname, dv->used);
1196 return 1;
1197 }
1198 sysfs_free(mdi);
1199 pr_err("%s not found in %s so cannot make it preferred replacement\n",
1200 dv->devname, devname);
1201 return -1;
1202}
1203
64c4757e 1204int Manage_subdevs(char *devname, int fd,
833bb0f8 1205 struct mddev_dev *devlist, int verbose, int test,
11b391ec 1206 char *update, int force)
cd29a5c8 1207{
7bd04da9 1208 /* Do something to each dev.
682c7051
NB
1209 * devmode can be
1210 * 'a' - add the device
1211 * try HOT_ADD_DISK
1212 * If that fails EINVAL, try ADD_NEW_DISK
7bd04da9
N
1213 * 'A' - re-add the device
1214 * 'r' - remove the device: HOT_REMOVE_DISK
b80da661
NB
1215 * device can be 'faulty' or 'detached' in which case all
1216 * matching devices are removed.
682c7051 1217 * 'f' - set the device faulty SET_DISK_FAULTY
b80da661
NB
1218 * device can be 'detached' in which case any device that
1219 * is inaccessible will be marked faulty.
70c55e36
N
1220 * 'R' - mark this device as wanting replacement.
1221 * 'W' - this device is added if necessary and activated as
1222 * a replacement for a previous 'R' device.
1223 * -----
1224 * 'w' - 'W' will be changed to 'w' when it is paired with
1225 * a 'R' device. If a 'W' is found while walking the list
1226 * it must be unpaired, and is an error.
1227 * 'M' - this is created by a 'missing' target. It is a slight
1228 * variant on 'A'
262e3b7f
N
1229 * 'F' - Another variant of 'A', where the device was faulty
1230 * so must be removed from the array first.
70c55e36 1231 *
98d27e39
N
1232 * For 'f' and 'r', the device can also be a kernel-internal
1233 * name such as 'sdb'.
682c7051
NB
1234 */
1235 mdu_array_info_t array;
7a3be72f 1236 unsigned long long array_size;
1d997643 1237 struct mddev_dev *dv;
682c7051 1238 struct stat stb;
cfad27a9 1239 int tfd = -1;
38aeaf3a 1240 struct supertype *tst;
4725bc31 1241 char *subarray = NULL;
98d27e39 1242 int sysfd = -1;
7d2e6486 1243 int count = 0; /* number of actions taken */
9f584691
N
1244 struct mdinfo info;
1245 int frozen = 0;
8af530b0 1246 int busy = 0;
682c7051
NB
1247
1248 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
7bd04da9 1249 pr_err("Cannot get array info for %s\n",
682c7051 1250 devname);
bcbb3112 1251 goto abort;
682c7051 1252 }
4dd2df09 1253 sysfs_init(&info, fd, NULL);
3da92f27 1254
7bd04da9 1255 /* array.size is only 32 bits and may be truncated.
7a3be72f
NB
1256 * So read from sysfs if possible, and record number of sectors
1257 */
1258
1259 array_size = get_component_size(fd);
1260 if (array_size <= 0)
1261 array_size = array.size * 2;
1262
4725bc31 1263 tst = super_by_fd(fd, &subarray);
3da92f27 1264 if (!tst) {
e7b84f9d 1265 pr_err("unsupport array - version %d.%d\n",
3da92f27 1266 array.major_version, array.minor_version);
bcbb3112 1267 goto abort;
3da92f27
NB
1268 }
1269
b3b4e8a7 1270 stb.st_rdev = 0;
38aeaf3a 1271 for (dv = devlist; dv; dv = dv->next) {
38aeaf3a 1272 int rv;
4a39c6f2 1273
1d997643
N
1274 if (strcmp(dv->devname, "failed") == 0 ||
1275 strcmp(dv->devname, "faulty") == 0) {
262e3b7f
N
1276 if (dv->disposition != 'A'
1277 && dv->disposition != 'r') {
e7b84f9d 1278 pr_err("%s only meaningful "
262e3b7f 1279 "with -r or --re-add, not -%c\n",
b80da661 1280 dv->devname, dv->disposition);
bcbb3112 1281 goto abort;
b80da661 1282 }
262e3b7f
N
1283 add_faulty(dv, fd, (dv->disposition == 'A'
1284 ? 'F' : 'r'));
1d997643
N
1285 continue;
1286 }
1287 if (strcmp(dv->devname, "detached") == 0) {
b80da661 1288 if (dv->disposition != 'r' && dv->disposition != 'f') {
e7b84f9d 1289 pr_err("%s only meaningful "
b80da661
NB
1290 "with -r of -f, not -%c\n",
1291 dv->devname, dv->disposition);
bcbb3112 1292 goto abort;
b80da661 1293 }
1d997643
N
1294 add_detached(dv, fd, dv->disposition);
1295 continue;
1296 }
1297
1298 if (strcmp(dv->devname, "missing") == 0) {
1299 struct mddev_dev *add_devlist = NULL;
1300 struct mddev_dev **dp;
c8e1a230 1301 if (dv->disposition != 'A') {
e7b84f9d 1302 pr_err("'missing' only meaningful "
aab15415 1303 "with --re-add\n");
bcbb3112 1304 goto abort;
a4e13010 1305 }
1d997643 1306 add_devlist = conf_get_devs();
a4e13010 1307 if (add_devlist == NULL) {
e7b84f9d 1308 pr_err("no devices to scan for missing members.");
a4e13010
N
1309 continue;
1310 }
1d997643 1311 for (dp = &add_devlist; *dp; dp = & (*dp)->next)
7bd04da9 1312 /* 'M' (for 'missing') is like 'A' without errors */
1d997643
N
1313 (*dp)->disposition = 'M';
1314 *dp = dv->next;
1315 dv->next = add_devlist;
1316 continue;
1317 }
1318
64a78416
N
1319 if (strncmp(dv->devname, "set-", 4) == 0 &&
1320 strlen(dv->devname) == 5) {
1321 int copies;
1322
1323 if (dv->disposition != 'r' &&
1324 dv->disposition != 'f') {
1325 pr_err("'%s' only meaningful with -r or -f\n",
1326 dv->devname);
1327 goto abort;
1328 }
1329 if (array.level != 10) {
1330 pr_err("'%s' only meaningful with RAID10 arrays\n",
1331 dv->devname);
1332 goto abort;
1333 }
1334 copies = ((array.layout & 0xff) *
1335 ((array.layout >> 8) & 0xff));
1336 if (array.raid_disks % copies != 0 ||
1337 dv->devname[4] < 'A' ||
1338 dv->devname[4] >= 'A' + copies ||
1339 copies > 26) {
1340 pr_err("'%s' not meaningful with this array\n",
1341 dv->devname);
1342 goto abort;
1343 }
1344 add_set(dv, fd, dv->devname[4]);
1345 continue;
1346 }
1347
1d997643 1348 if (strchr(dv->devname, '/') == NULL &&
aab15415
N
1349 strchr(dv->devname, ':') == NULL &&
1350 strlen(dv->devname) < 50) {
98d27e39
N
1351 /* Assume this is a kernel-internal name like 'sda1' */
1352 int found = 0;
1353 char dname[55];
1354 if (dv->disposition != 'r' && dv->disposition != 'f') {
e7b84f9d 1355 pr_err("%s only meaningful "
cfad27a9 1356 "with -r or -f, not -%c\n",
98d27e39 1357 dv->devname, dv->disposition);
bcbb3112 1358 goto abort;
98d27e39
N
1359 }
1360
1361 sprintf(dname, "dev-%s", dv->devname);
4dd2df09 1362 sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
98d27e39
N
1363 if (sysfd >= 0) {
1364 char dn[20];
1365 int mj,mn;
1366 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
1367 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
1368 stb.st_rdev = makedev(mj,mn);
1369 found = 1;
1370 }
1371 close(sysfd);
1372 sysfd = -1;
1373 }
1374 if (!found) {
4dd2df09 1375 sysfd = sysfs_open(fd2devnm(fd), dname, "state");
98d27e39 1376 if (sysfd < 0) {
e7b84f9d 1377 pr_err("%s does not appear "
98d27e39
N
1378 "to be a component of %s\n",
1379 dv->devname, devname);
bcbb3112 1380 goto abort;
98d27e39
N
1381 }
1382 }
b80da661 1383 } else {
c7b47447 1384 tfd = dev_open(dv->devname, O_RDONLY);
5fe7f5f7
N
1385 if (tfd >= 0)
1386 fstat(tfd, &stb);
5a9de8db 1387 else {
5fe7f5f7
N
1388 int open_err = errno;
1389 if (stat(dv->devname, &stb) != 0) {
1390 pr_err("Cannot find %s: %s\n",
1391 dv->devname, strerror(errno));
1392 goto abort;
1393 }
1394 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
1395 if (dv->disposition == 'M')
1396 /* non-fatal. Also improbable */
1397 continue;
1398 pr_err("%s is not a block device.\n",
1399 dv->devname);
1400 goto abort;
1401 }
1402 if (dv->disposition == 'r')
1403 /* Be happy, the stat worked, that is
1404 * enough for --remove
1405 */
1406 ;
1407 else {
1d997643
N
1408 if (dv->disposition == 'M')
1409 /* non-fatal */
1410 continue;
839f27a3 1411 pr_err("Cannot open %s: %s\n",
5fe7f5f7 1412 dv->devname, strerror(open_err));
bcbb3112 1413 goto abort;
5a9de8db 1414 }
b80da661 1415 }
682c7051 1416 }
cd29a5c8 1417 switch(dv->disposition){
682c7051 1418 default:
e7b84f9d 1419 pr_err("internal error - devmode[%s]=%d\n",
c913b90e 1420 dv->devname, dv->disposition);
bcbb3112 1421 goto abort;
682c7051 1422 case 'a':
c8e1a230 1423 case 'A':
262e3b7f
N
1424 case 'M': /* --re-add missing */
1425 case 'F': /* --re-add faulty */
4a39c6f2 1426 /* add the device */
4725bc31 1427 if (subarray) {
e7b84f9d 1428 pr_err("Cannot add disks to a"
f7dd881f
DW
1429 " \'member\' array, perform this"
1430 " operation on the parent container\n");
bcbb3112 1431 goto abort;
f94d52f4 1432 }
262e3b7f
N
1433 if (dv->disposition == 'F')
1434 /* Need to remove first */
1435 ioctl(fd, HOT_REMOVE_DISK,
1436 (unsigned long)stb.st_rdev);
f277ce36 1437 /* Make sure it isn't in use (in 2.6 or later) */
1d997643 1438 tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
38aeaf3a
N
1439 if (tfd >= 0) {
1440 /* We know no-one else is using it. We'll
1441 * need non-exclusive access to add it, so
1442 * do that now.
1443 */
1444 close(tfd);
1445 tfd = dev_open(dv->devname, O_RDONLY);
1011e834 1446 }
0fbf459d 1447 if (tfd < 0) {
1d997643
N
1448 if (dv->disposition == 'M')
1449 continue;
e7b84f9d 1450 pr_err("Cannot open %s: %s\n",
d7eaf49f 1451 dv->devname, strerror(errno));
bcbb3112 1452 goto abort;
d7eaf49f 1453 }
9f584691
N
1454 if (!frozen) {
1455 if (sysfs_freeze_array(&info) == 1)
1456 frozen = 1;
1457 else
1458 frozen = -1;
1459 }
38aeaf3a
N
1460 rv = Manage_add(fd, tfd, dv, tst, &array,
1461 force, verbose, devname, update,
1462 stb.st_rdev, array_size);
1463 close(tfd);
1464 tfd = -1;
1465 if (rv < 0)
bcbb3112 1466 goto abort;
38aeaf3a
N
1467 if (rv > 0)
1468 count++;
682c7051
NB
1469 break;
1470
1471 case 'r':
1472 /* hot remove */
4725bc31 1473 if (subarray) {
e7b84f9d 1474 pr_err("Cannot remove disks from a"
f7dd881f
DW
1475 " \'member\' array, perform this"
1476 " operation on the parent container\n");
d070235d
N
1477 rv = -1;
1478 } else
1479 rv = Manage_remove(tst, fd, dv, sysfd,
1480 stb.st_rdev, verbose,
1481 devname);
1482 if (sysfd >= 0)
98d27e39 1483 close(sysfd);
d070235d
N
1484 sysfd = -1;
1485 if (rv < 0)
bcbb3112 1486 goto abort;
d070235d
N
1487 if (rv > 0)
1488 count++;
682c7051
NB
1489 break;
1490
1491 case 'f': /* set faulty */
1492 /* FIXME check current member */
98d27e39
N
1493 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1494 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1495 (unsigned long) stb.st_rdev))) {
8af530b0
N
1496 if (errno == EBUSY)
1497 busy = 1;
e7b84f9d 1498 pr_err("set device faulty failed for %s: %s\n",
1d997643 1499 dv->devname, strerror(errno));
98d27e39
N
1500 if (sysfd >= 0)
1501 close(sysfd);
bcbb3112 1502 goto abort;
682c7051 1503 }
98d27e39
N
1504 if (sysfd >= 0)
1505 close(sysfd);
1506 sysfd = -1;
7d2e6486 1507 count++;
dab6685f 1508 if (verbose >= 0)
e7b84f9d 1509 pr_err("set %s faulty in %s\n",
1d997643 1510 dv->devname, devname);
682c7051 1511 break;
70c55e36
N
1512 case 'R': /* Mark as replaceable */
1513 if (subarray) {
1514 pr_err("Cannot replace disks in a"
1515 " \'member\' array, perform this"
1516 " operation on the parent container\n");
1517 rv = -1;
1518 } else {
1519 if (!frozen) {
1520 if (sysfs_freeze_array(&info) == 1)
1521 frozen = 1;
1522 else
1523 frozen = -1;
1524 }
1525 rv = Manage_replace(tst, fd, dv,
1526 stb.st_rdev, verbose,
1527 devname);
1528 }
1529 if (rv < 0)
1530 goto abort;
1531 if (rv > 0)
1532 count++;
1533 break;
1534 case 'W': /* --with device that doesn't match */
1535 pr_err("No matching --replace device for --with %s\n",
1536 dv->devname);
1537 goto abort;
1538 case 'w': /* --with device which was matched */
1539 rv = Manage_with(tst, fd, dv,
1540 stb.st_rdev, verbose, devname);
1541 if (rv < 0)
1542 goto abort;
1543 break;
682c7051
NB
1544 }
1545 }
9f584691
N
1546 if (frozen > 0)
1547 sysfs_set_str(&info, NULL, "sync_action","idle");
7d2e6486
N
1548 if (test && count == 0)
1549 return 2;
682c7051 1550 return 0;
bcbb3112
N
1551
1552abort:
9f584691
N
1553 if (frozen > 0)
1554 sysfs_set_str(&info, NULL, "sync_action","idle");
8af530b0 1555 return !test && busy ? 2 : 1;
64c4757e 1556}
1f48664b
NB
1557
1558int autodetect(void)
1559{
1560 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1561 int rv = 1;
1562 int fd = dev_open("9:0", O_RDONLY);
1563 if (fd >= 0) {
1564 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1565 rv = 0;
1566 close(fd);
1567 }
1568 return rv;
1569}
aa534678 1570
ba728be7 1571int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
aa534678
DW
1572{
1573 struct supertype supertype, *st = &supertype;
1574 int fd, rv = 2;
1575
1576 memset(st, 0, sizeof(*st));
aa534678 1577
ba728be7 1578 fd = open_subarray(dev, subarray, st, verbose < 0);
aa534678
DW
1579 if (fd < 0)
1580 return 2;
1581
1582 if (!st->ss->update_subarray) {
ba728be7 1583 if (verbose >= 0)
e7b84f9d
N
1584 pr_err("Operation not supported for %s metadata\n",
1585 st->ss->name);
aa534678
DW
1586 goto free_super;
1587 }
1588
4dd2df09 1589 if (mdmon_running(st->devnm))
aa534678
DW
1590 st->update_tail = &st->updates;
1591
a951a4f7 1592 rv = st->ss->update_subarray(st, subarray, update, ident);
aa534678
DW
1593
1594 if (rv) {
ba728be7 1595 if (verbose >= 0)
e7b84f9d 1596 pr_err("Failed to update %s of subarray-%s in %s\n",
aa534678
DW
1597 update, subarray, dev);
1598 } else if (st->update_tail)
1599 flush_metadata_updates(st);
1600 else
1601 st->ss->sync_metadata(st);
1602
ba728be7 1603 if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
e7b84f9d
N
1604 pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
1605 subarray, dev);
aa534678
DW
1606
1607 free_super:
1608 st->ss->free_super(st);
1609 close(fd);
1610
1611 return rv;
1612}
d52bb542 1613
7bd04da9
N
1614/* Move spare from one array to another If adding to destination array fails
1615 * add back to original array.
d52bb542
AC
1616 * Returns 1 on success, 0 on failure */
1617int move_spare(char *from_devname, char *to_devname, dev_t devid)
1618{
1619 struct mddev_dev devlist;
1620 char devname[20];
1621
1622 /* try to remove and add */
1623 int fd1 = open(to_devname, O_RDONLY);
1624 int fd2 = open(from_devname, O_RDONLY);
1625
1626 if (fd1 < 0 || fd2 < 0) {
1627 if (fd1>=0) close(fd1);
1628 if (fd2>=0) close(fd2);
1629 return 0;
1630 }
1631
1632 devlist.next = NULL;
1633 devlist.used = 0;
d52bb542
AC
1634 devlist.writemostly = 0;
1635 devlist.devname = devname;
1636 sprintf(devname, "%d:%d", major(devid), minor(devid));
1637
1638 devlist.disposition = 'r';
11b391ec 1639 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
d52bb542 1640 devlist.disposition = 'a';
11b391ec 1641 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
d52bb542
AC
1642 /* make sure manager is aware of changes */
1643 ping_manager(to_devname);
1644 ping_manager(from_devname);
1645 close(fd1);
1646 close(fd2);
1647 return 1;
1648 }
11b391ec 1649 else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
d52bb542
AC
1650 }
1651 close(fd1);
1652 close(fd2);
1653 return 0;
1654}
435d4ebb 1655#endif