]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
467efb73e367e330de1594ad03c9d116eee0b856
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include <ctype.h>
29
30 int Manage_ro(char *devname, int fd, int readonly)
31 {
32 /* switch to readonly or rw
33 *
34 * requires >= 0.90.0
35 * first check that array is runing
36 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
37 *
38 */
39 struct mdinfo *mdi;
40 int rv = 0;
41
42 /* If this is an externally-managed array, we need to modify the
43 * metadata_version so that mdmon doesn't undo our change.
44 */
45 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
46 if (mdi &&
47 mdi->array.major_version == -1 &&
48 is_subarray(mdi->text_version)) {
49 char vers[64];
50 strcpy(vers, "external:");
51 strcat(vers, mdi->text_version);
52 if (readonly > 0) {
53 int rv;
54 /* We set readonly ourselves. */
55 vers[9] = '-';
56 sysfs_set_str(mdi, NULL, "metadata_version", vers);
57
58 close(fd);
59 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
60
61 if (rv < 0) {
62 pr_err("failed to set readonly for %s: %s\n",
63 devname, strerror(errno));
64
65 vers[9] = mdi->text_version[0];
66 sysfs_set_str(mdi, NULL, "metadata_version", vers);
67 rv = 1;
68 goto out;
69 }
70 } else {
71 char *cp;
72 /* We cannot set read/write - must signal mdmon */
73 vers[9] = '/';
74 sysfs_set_str(mdi, NULL, "metadata_version", vers);
75
76 cp = strchr(vers+10, '/');
77 if (cp)
78 *cp = 0;
79 ping_monitor(vers+10);
80 if (mdi->array.level <= 0)
81 sysfs_set_str(mdi, NULL, "array_state", "active");
82 }
83 goto out;
84 }
85
86 if (!md_array_active(fd)) {
87 pr_err("%s does not appear to be active.\n", devname);
88 rv = 1;
89 goto out;
90 }
91
92 if (readonly > 0) {
93 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
94 pr_err("failed to set readonly for %s: %s\n",
95 devname, strerror(errno));
96 rv = 1;
97 goto out;
98 }
99 } else if (readonly < 0) {
100 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
101 pr_err("failed to set writable for %s: %s\n",
102 devname, strerror(errno));
103 rv = 1;
104 goto out;
105 }
106 }
107 out:
108 sysfs_free(mdi);
109 return rv;
110 }
111
112 static void remove_devices(char *devnm, char *path)
113 {
114 /*
115 * Remove names at 'path' - possibly with
116 * partition suffixes - which link to the 'standard'
117 * name for devnm. These were probably created
118 * by mdadm when the array was assembled.
119 */
120 char base[40];
121 char *path2;
122 char link[1024];
123 int n;
124 int part;
125 char *be;
126 char *pe;
127
128 if (!path)
129 return;
130
131 sprintf(base, "/dev/%s", devnm);
132 be = base + strlen(base);
133
134 path2 = xmalloc(strlen(path)+20);
135 strcpy(path2, path);
136 pe = path2 + strlen(path2);
137
138 for (part = 0; part < 16; part++) {
139 if (part) {
140 sprintf(be, "p%d", part);
141
142 if (isdigit(pe[-1]))
143 sprintf(pe, "p%d", part);
144 else
145 sprintf(pe, "%d", part);
146 }
147 n = readlink(path2, link, sizeof(link));
148 if (n > 0 && (int)strlen(base) == n &&
149 strncmp(link, base, n) == 0)
150 unlink(path2);
151 }
152 free(path2);
153 }
154
155 int Manage_run(char *devname, int fd, struct context *c)
156 {
157 /* Run the array. Array must already be configured
158 * Requires >= 0.90.0
159 */
160 char nm[32], *nmp;
161
162 nmp = fd2devnm(fd);
163 if (!nmp) {
164 pr_err("Cannot find %s in sysfs!!\n", devname);
165 return 1;
166 }
167 strcpy(nm, nmp);
168 return IncrementalScan(c, nm);
169 }
170
171 int Manage_stop(char *devname, int fd, int verbose, int will_retry)
172 {
173 /* Stop the array. Array must already be configured
174 * 'will_retry' means that error messages are not wanted.
175 */
176 int rv = 0;
177 struct map_ent *map = NULL;
178 struct mdinfo *mdi;
179 char devnm[32];
180 char container[32];
181 int err;
182 int count;
183 char buf[32];
184 unsigned long long rd1, rd2;
185
186 if (will_retry && verbose == 0)
187 verbose = -1;
188
189 strcpy(devnm, fd2devnm(fd));
190 /* Get EXCL access first. If this fails, then attempting
191 * to stop is probably a bad idea.
192 */
193 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
194 if (mdi && is_subarray(mdi->text_version)) {
195 char *sl;
196 strncpy(container, mdi->text_version+1, sizeof(container));
197 container[sizeof(container)-1] = 0;
198 sl = strchr(container, '/');
199 if (sl)
200 *sl = 0;
201 } else
202 container[0] = 0;
203 close(fd);
204 count = 5;
205 while (((fd = ((devname[0] == '/')
206 ?open(devname, O_RDONLY|O_EXCL)
207 :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
208 || strcmp(fd2devnm(fd), devnm) != 0)
209 && container[0]
210 && mdmon_running(container)
211 && count) {
212 /* Can't open, so something might be wrong. However it
213 * is a container, so we might be racing with mdmon, so
214 * retry for a bit.
215 */
216 if (fd >= 0)
217 close(fd);
218 flush_mdmon(container);
219 count--;
220 }
221 if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
222 if (fd >= 0)
223 close(fd);
224 if (verbose >= 0)
225 pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
226 devname);
227 return 1;
228 }
229 /* If this is an mdmon managed array, just write 'inactive'
230 * to the array state and let mdmon clear up.
231 */
232 if (mdi &&
233 mdi->array.level > 0 &&
234 is_subarray(mdi->text_version)) {
235 int err;
236 /* This is mdmon managed. */
237 close(fd);
238
239 /* As we had an O_EXCL open, any use of the device
240 * which blocks STOP_ARRAY is probably a transient use,
241 * so it is reasonable to retry for a while - 5 seconds.
242 */
243 count = 25;
244 while (count &&
245 (err = sysfs_set_str(mdi, NULL,
246 "array_state",
247 "inactive")) < 0
248 && errno == EBUSY) {
249 usleep(200000);
250 count--;
251 }
252 if (err) {
253 if (verbose >= 0)
254 pr_err("failed to stop array %s: %s\n",
255 devname, strerror(errno));
256 rv = 1;
257 goto out;
258 }
259
260 /* Give monitor a chance to act */
261 ping_monitor(mdi->text_version);
262
263 fd = open_dev_excl(devnm);
264 if (fd < 0) {
265 if (verbose >= 0)
266 pr_err("failed to completely stop %s: Device is busy\n",
267 devname);
268 rv = 1;
269 goto out;
270 }
271 } else if (mdi &&
272 mdi->array.major_version == -1 &&
273 mdi->array.minor_version == -2 &&
274 !is_subarray(mdi->text_version)) {
275 struct mdstat_ent *mds, *m;
276 /* container, possibly mdmon-managed.
277 * Make sure mdmon isn't opening it, which
278 * would interfere with the 'stop'
279 */
280 ping_monitor(mdi->sys_name);
281
282 /* now check that there are no existing arrays
283 * which are members of this array
284 */
285 mds = mdstat_read(0, 0);
286 for (m = mds; m; m = m->next)
287 if (m->metadata_version &&
288 strncmp(m->metadata_version, "external:", 9)==0 &&
289 metadata_container_matches(m->metadata_version+9,
290 devnm)) {
291 if (verbose >= 0)
292 pr_err("Cannot stop container %s: member %s still active\n",
293 devname, m->devnm);
294 free_mdstat(mds);
295 rv = 1;
296 goto out;
297 }
298 }
299
300 /* If the array is undergoing a reshape which changes the number
301 * of devices, then it would be nice to stop it at a point where
302 * it has completed a full number of stripes in both old and
303 * new layouts as this will allow the reshape to be reverted.
304 * So if 'sync_action' is "reshape" and 'raid_disks' shows two
305 * different numbers, then
306 * - freeze reshape
307 * - set sync_max to next multiple of both data_disks and
308 * chunk sizes (or next but one)
309 * - unfreeze reshape
310 * - wait on 'sync_completed' for that point to be reached.
311 */
312 if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
313 sysfs_attribute_available(mdi, NULL, "sync_action") &&
314 sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
315 sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
316 strcmp(buf, "reshape\n") == 0 &&
317 sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
318 unsigned long long position, curr;
319 unsigned long long chunk1, chunk2;
320 unsigned long long rddiv, chunkdiv;
321 unsigned long long sectors;
322 unsigned long long sync_max, old_sync_max;
323 unsigned long long completed;
324 int backwards = 0;
325 int delay;
326 int scfd;
327
328 delay = 40;
329 while (rd1 > rd2 && delay > 0 &&
330 sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
331 /* must be in the critical section - wait a bit */
332 delay -= 1;
333 usleep(100000);
334 }
335
336 if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
337 goto done;
338 /* Array is frozen */
339
340 rd1 -= mdi->array.level == 6 ? 2 : 1;
341 rd2 -= mdi->array.level == 6 ? 2 : 1;
342 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
343 if (strncmp(buf, "back", 4) == 0)
344 backwards = 1;
345 if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
346 /* reshape must have finished now */
347 sysfs_set_str(mdi, NULL, "sync_action", "idle");
348 goto done;
349 }
350 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
351 chunk1 /= 512;
352 chunk2 /= 512;
353 rddiv = GCD(rd1, rd2);
354 chunkdiv = GCD(chunk1, chunk2);
355 sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
356
357 if (backwards) {
358 /* Need to subtract 'reshape_position' from
359 * array size to get equivalent of sync_max.
360 * Size calculation based on raid5_size in kernel.
361 */
362 unsigned long long size = mdi->component_size;
363 size &= ~(chunk1-1);
364 size &= ~(chunk2-1);
365 /* rd1 must be smaller */
366 /* Reshape may have progressed further backwards than
367 * recorded, so target even further back (hence "-1")
368 */
369 position = (position / sectors - 1) * sectors;
370 /* rd1 is always the conversion factor between 'sync'
371 * position and 'reshape' position.
372 * We read 1 "new" stripe worth of data from where-ever,
373 * and when write out that full stripe.
374 */
375 sync_max = size - position/rd1;
376 } else {
377 /* Reshape will very likely be beyond position, and it may
378 * be too late to stop at '+1', so aim for '+2'
379 */
380 position = (position / sectors + 2) * sectors;
381 sync_max = position/rd1;
382 }
383 if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
384 old_sync_max = mdi->component_size;
385 /* Must not advance sync_max as that could confuse
386 * the reshape monitor */
387 if (sync_max < old_sync_max)
388 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
389 sysfs_set_str(mdi, NULL, "sync_action", "idle");
390
391 /* That should have set things going again. Now we
392 * wait a little while (3 second max) for sync_completed
393 * to reach the target.
394 * The reshape process can block for 500msec if
395 * the sync speed limit is hit, so we need to wait
396 * a lot longer than that. 1 second is usually
397 * enough. 3 is safe.
398 */
399 delay = 3000;
400 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
401 while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
402 unsigned long long max_completed;
403 sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
404 sysfs_fd_get_str(scfd, buf, sizeof(buf));
405 if (strncmp(buf, "none", 4) == 0) {
406 /* Either reshape has aborted, or hasn't
407 * quite started yet. Wait a bit and
408 * check 'sync_action' to see.
409 */
410 usleep(10000);
411 sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
412 if (strncmp(buf, "reshape", 7) != 0)
413 break;
414 }
415
416 if (sysfs_fd_get_two(scfd, &completed,
417 &max_completed) == 2 &&
418 /* 'completed' sometimes reads as max-uulong */
419 completed < max_completed &&
420 (completed > sync_max ||
421 (completed == sync_max && curr != position))) {
422 while (completed > sync_max) {
423 sync_max += sectors / rd1;
424 if (backwards)
425 position -= sectors;
426 else
427 position += sectors;
428 }
429 if (sync_max < old_sync_max)
430 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
431 }
432
433 if (!backwards && curr >= position)
434 break;
435 if (backwards && curr <= position)
436 break;
437 sysfs_wait(scfd, &delay);
438 }
439 if (scfd >= 0)
440 close(scfd);
441
442 }
443 done:
444
445 /* As we have an O_EXCL open, any use of the device
446 * which blocks STOP_ARRAY is probably a transient use,
447 * so it is reasonable to retry for a while - 5 seconds.
448 */
449 count = 25; err = 0;
450 while (count && fd >= 0
451 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
452 && errno == EBUSY) {
453 usleep(200000);
454 count --;
455 }
456 if (fd >= 0 && err) {
457 if (verbose >= 0) {
458 pr_err("failed to stop array %s: %s\n",
459 devname, strerror(errno));
460 if (errno == EBUSY)
461 cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
462 }
463 rv = 1;
464 goto out;
465 }
466
467 if (get_linux_version() < 2006028) {
468 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
469 * was stopped, so We'll do it here just to be sure. Drop any
470 * partitions as well...
471 */
472 if (fd >= 0)
473 ioctl(fd, BLKRRPART, 0);
474 if (mdi)
475 sysfs_uevent(mdi, "change");
476 }
477
478 if (devnm[0] && use_udev()) {
479 struct map_ent *mp = map_by_devnm(&map, devnm);
480 remove_devices(devnm, mp ? mp->path : NULL);
481 }
482
483 if (verbose >= 0)
484 pr_err("stopped %s\n", devname);
485 map_lock(&map);
486 map_remove(&map, devnm);
487 map_unlock(&map);
488 out:
489 sysfs_free(mdi);
490
491 return rv;
492 }
493
494 static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
495 {
496 struct mddev_dev *new;
497 new = xmalloc(sizeof(*new));
498 memset(new, 0, sizeof(*new));
499 new->devname = xstrdup(name);
500 new->disposition = disp;
501 new->next = dv->next;
502 dv->next = new;
503 return new;
504 }
505
506 static void add_faulty(struct mddev_dev *dv, int fd, char disp)
507 {
508 mdu_array_info_t array;
509 mdu_disk_info_t disk;
510 int remaining_disks;
511 int i;
512
513 if (md_get_array_info(fd, &array) != 0)
514 return;
515
516 remaining_disks = array.nr_disks;
517 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
518 char buf[40];
519 disk.number = i;
520 if (md_get_disk_info(fd, &disk) != 0)
521 continue;
522 if (disk.major == 0 && disk.minor == 0)
523 continue;
524 remaining_disks--;
525 if ((disk.state & 1) == 0) /* not faulty */
526 continue;
527 sprintf(buf, "%d:%d", disk.major, disk.minor);
528 dv = add_one(dv, buf, disp);
529 }
530 }
531
532 static void add_detached(struct mddev_dev *dv, int fd, char disp)
533 {
534 mdu_array_info_t array;
535 mdu_disk_info_t disk;
536 int remaining_disks;
537 int i;
538
539 if (md_get_array_info(fd, &array) != 0)
540 return;
541
542 remaining_disks = array.nr_disks;
543 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
544 char buf[40];
545 int sfd;
546 disk.number = i;
547 if (md_get_disk_info(fd, &disk) != 0)
548 continue;
549 if (disk.major == 0 && disk.minor == 0)
550 continue;
551 remaining_disks--;
552 if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
553 continue;
554 sprintf(buf, "%d:%d", disk.major, disk.minor);
555 sfd = dev_open(buf, O_RDONLY);
556 if (sfd >= 0) {
557 /* Not detached */
558 close(sfd);
559 continue;
560 }
561 if (errno != ENXIO)
562 /* Probably not detached */
563 continue;
564 dv = add_one(dv, buf, disp);
565 }
566 }
567
568 static void add_set(struct mddev_dev *dv, int fd, char set_char)
569 {
570 mdu_array_info_t array;
571 mdu_disk_info_t disk;
572 int remaining_disks;
573 int copies, set;
574 int i;
575
576 if (md_get_array_info(fd, &array) != 0)
577 return;
578 if (array.level != 10)
579 return;
580 copies = ((array.layout & 0xff) *
581 ((array.layout >> 8) & 0xff));
582 if (array.raid_disks % copies)
583 return;
584
585 remaining_disks = array.nr_disks;
586 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
587 char buf[40];
588 disk.number = i;
589 if (md_get_disk_info(fd, &disk) != 0)
590 continue;
591 if (disk.major == 0 && disk.minor == 0)
592 continue;
593 remaining_disks--;
594 set = disk.raid_disk % copies;
595 if (set_char != set + 'A')
596 continue;
597 sprintf(buf, "%d:%d", disk.major, disk.minor);
598 dv = add_one(dv, buf, dv->disposition);
599 }
600 }
601
602 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
603 struct supertype *dev_st, struct supertype *tst,
604 unsigned long rdev,
605 char *update, char *devname, int verbose,
606 mdu_array_info_t *array)
607 {
608 struct mdinfo mdi;
609 int duuid[4];
610 int ouuid[4];
611
612 dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
613 dev_st->ss->uuid_from_super(dev_st, ouuid);
614 if (tst->sb)
615 tst->ss->uuid_from_super(tst, duuid);
616 else
617 /* Assume uuid matches: kernel will check */
618 memcpy(duuid, ouuid, sizeof(ouuid));
619 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
620 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
621 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
622 /* Looks like it is worth a
623 * try. Need to make sure
624 * kernel will accept it
625 * though.
626 */
627 mdu_disk_info_t disc;
628 /* re-add doesn't work for version-1 superblocks
629 * before 2.6.18 :-(
630 */
631 if (array->major_version == 1 &&
632 get_linux_version() <= 2006018)
633 goto skip_re_add;
634 disc.number = mdi.disk.number;
635 if (md_get_disk_info(fd, &disc) != 0 ||
636 disc.major != 0 || disc.minor != 0)
637 goto skip_re_add;
638 disc.major = major(rdev);
639 disc.minor = minor(rdev);
640 disc.number = mdi.disk.number;
641 disc.raid_disk = mdi.disk.raid_disk;
642 disc.state = mdi.disk.state;
643 if (array->state & (1 << MD_SB_CLUSTERED)) {
644 /* extra flags are needed when adding to a cluster as
645 * there are two cases to distinguish
646 */
647 if (dv->disposition == 'c')
648 disc.state |= (1 << MD_DISK_CANDIDATE);
649 else
650 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
651 }
652 if (dv->writemostly == FlagSet)
653 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
654 if (dv->writemostly == FlagClear)
655 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
656 if (dv->failfast == FlagSet)
657 disc.state |= 1 << MD_DISK_FAILFAST;
658 if (dv->failfast == FlagClear)
659 disc.state &= ~(1 << MD_DISK_FAILFAST);
660 remove_partitions(tfd);
661 if (update || dv->writemostly != FlagDefault
662 || dv->failfast != FlagDefault) {
663 int rv = -1;
664 tfd = dev_open(dv->devname, O_RDWR);
665 if (tfd < 0) {
666 pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
667 return -1;
668 }
669
670 if (dv->writemostly == FlagSet)
671 rv = dev_st->ss->update_super(
672 dev_st, NULL, "writemostly",
673 devname, verbose, 0, NULL);
674 if (dv->writemostly == FlagClear)
675 rv = dev_st->ss->update_super(
676 dev_st, NULL, "readwrite",
677 devname, verbose, 0, NULL);
678 if (dv->failfast == FlagSet)
679 rv = dev_st->ss->update_super(
680 dev_st, NULL, "failfast",
681 devname, verbose, 0, NULL);
682 if (dv->failfast == FlagClear)
683 rv = dev_st->ss->update_super(
684 dev_st, NULL, "nofailfast",
685 devname, verbose, 0, NULL);
686 if (update)
687 rv = dev_st->ss->update_super(
688 dev_st, NULL, update,
689 devname, verbose, 0, NULL);
690 if (rv == 0)
691 rv = dev_st->ss->store_super(dev_st, tfd);
692 close(tfd);
693 if (rv != 0) {
694 pr_err("failed to update superblock during re-add\n");
695 return -1;
696 }
697 }
698 /* don't even try if disk is marked as faulty */
699 errno = 0;
700 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
701 if (verbose >= 0)
702 pr_err("re-added %s\n", dv->devname);
703 return 1;
704 }
705 if (errno == ENOMEM || errno == EROFS) {
706 pr_err("add new device failed for %s: %s\n",
707 dv->devname, strerror(errno));
708 if (dv->disposition == 'M')
709 return 0;
710 return -1;
711 }
712 }
713 skip_re_add:
714 return 0;
715 }
716
717 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
718 struct supertype *tst, mdu_array_info_t *array,
719 int force, int verbose, char *devname,
720 char *update, unsigned long rdev, unsigned long long array_size,
721 int raid_slot)
722 {
723 unsigned long long ldsize;
724 struct supertype *dev_st;
725 int j;
726 mdu_disk_info_t disc;
727
728 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
729 if (dv->disposition == 'M')
730 return 0;
731 else
732 return -1;
733 }
734
735 if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
736 /* More than 4TB is wasted on v0.90 */
737 if (!force) {
738 pr_err("%s is larger than %s can effectively use.\n"
739 " Add --force is you really want to add this device.\n",
740 dv->devname, devname);
741 return -1;
742 }
743 pr_err("%s is larger than %s can effectively use.\n"
744 " Adding anyway as --force was given.\n",
745 dv->devname, devname);
746 }
747 if (!tst->ss->external && array->major_version == 0) {
748 if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
749 if (verbose >= 0)
750 pr_err("hot added %s\n",
751 dv->devname);
752 return 1;
753 }
754
755 pr_err("hot add failed for %s: %s\n",
756 dv->devname, strerror(errno));
757 return -1;
758 }
759
760 if (array->not_persistent == 0 || tst->ss->external) {
761
762 /* need to find a sample superblock to copy, and
763 * a spare slot to use.
764 * For 'external' array (well, container based),
765 * We can just load the metadata for the array->
766 */
767 int array_failed;
768 if (tst->sb)
769 /* already loaded */;
770 else if (tst->ss->external) {
771 tst->ss->load_container(tst, fd, NULL);
772 } else for (j = 0; j < tst->max_devs; j++) {
773 char *dev;
774 int dfd;
775 disc.number = j;
776 if (md_get_disk_info(fd, &disc))
777 continue;
778 if (disc.major==0 && disc.minor==0)
779 continue;
780 if ((disc.state & 4)==0) /* sync */
781 continue;
782 /* Looks like a good device to try */
783 dev = map_dev(disc.major, disc.minor, 1);
784 if (!dev)
785 continue;
786 dfd = dev_open(dev, O_RDONLY);
787 if (dfd < 0)
788 continue;
789 if (tst->ss->load_super(tst, dfd,
790 NULL)) {
791 close(dfd);
792 continue;
793 }
794 close(dfd);
795 break;
796 }
797 /* FIXME this is a bad test to be using */
798 if (!tst->sb && (dv->disposition != 'a'
799 && dv->disposition != 'S')) {
800 /* we are re-adding a device to a
801 * completely dead array - have to depend
802 * on kernel to check
803 */
804 } else if (!tst->sb) {
805 pr_err("cannot load array metadata from %s\n", devname);
806 return -1;
807 }
808
809 /* Make sure device is large enough */
810 if (dv->disposition != 'j' && /* skip size check for Journal */
811 tst->sb &&
812 tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
813 array_size) {
814 if (dv->disposition == 'M')
815 return 0;
816 pr_err("%s not large enough to join array\n",
817 dv->devname);
818 return -1;
819 }
820
821 /* Possibly this device was recently part of
822 * the array and was temporarily removed, and
823 * is now being re-added. If so, we can
824 * simply re-add it.
825 */
826
827 if (array->not_persistent == 0) {
828 dev_st = dup_super(tst);
829 dev_st->ss->load_super(dev_st, tfd, NULL);
830 if (dev_st->sb && dv->disposition != 'S') {
831 int rv;
832
833 rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
834 rdev, update, devname,
835 verbose, array);
836 dev_st->ss->free_super(dev_st);
837 if (rv)
838 return rv;
839 }
840 }
841 if (dv->disposition == 'M') {
842 if (verbose > 0)
843 pr_err("--re-add for %s to %s is not possible\n",
844 dv->devname, devname);
845 return 0;
846 }
847 if (dv->disposition == 'A') {
848 pr_err("--re-add for %s to %s is not possible\n",
849 dv->devname, devname);
850 return -1;
851 }
852 if (array->active_disks < array->raid_disks) {
853 char *avail = xcalloc(array->raid_disks, 1);
854 int d;
855 int found = 0;
856
857 for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
858 disc.number = d;
859 if (md_get_disk_info(fd, &disc))
860 continue;
861 if (disc.major == 0 && disc.minor == 0)
862 continue;
863 if (!(disc.state & (1<<MD_DISK_SYNC)))
864 continue;
865 avail[disc.raid_disk] = 1;
866 found++;
867 }
868 array_failed = !enough(array->level, array->raid_disks,
869 array->layout, 1, avail);
870 free(avail);
871 } else
872 array_failed = 0;
873 if (array_failed) {
874 pr_err("%s has failed so using --add cannot work and might destroy\n",
875 devname);
876 pr_err("data on %s. You should stop the array and re-assemble it.\n",
877 dv->devname);
878 return -1;
879 }
880 } else {
881 /* non-persistent. Must ensure that new drive
882 * is at least array->size big.
883 */
884 if (ldsize/512 < array_size) {
885 pr_err("%s not large enough to join array\n",
886 dv->devname);
887 return -1;
888 }
889 }
890 /* committed to really trying this device now*/
891 remove_partitions(tfd);
892
893 /* in 2.6.17 and earlier, version-1 superblocks won't
894 * use the number we write, but will choose a free number.
895 * we must choose the same free number, which requires
896 * starting at 'raid_disks' and counting up
897 */
898 for (j = array->raid_disks; j < tst->max_devs; j++) {
899 disc.number = j;
900 if (md_get_disk_info(fd, &disc))
901 break;
902 if (disc.major==0 && disc.minor==0)
903 break;
904 if (disc.state & 8) /* removed */
905 break;
906 }
907 disc.major = major(rdev);
908 disc.minor = minor(rdev);
909 if (raid_slot < 0)
910 disc.number = j;
911 else
912 disc.number = raid_slot;
913 disc.state = 0;
914
915 /* only add journal to array that supports journaling */
916 if (dv->disposition == 'j') {
917 struct mdinfo mdi;
918 struct mdinfo *mdp;
919
920 mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
921 if (!mdp) {
922 pr_err("%s unable to read array state.\n", devname);
923 return -1;
924 }
925
926 if (mdp->array_state != ARRAY_READONLY) {
927 sysfs_free(mdp);
928 pr_err("%s is not readonly, cannot add journal.\n", devname);
929 return -1;
930 }
931
932 sysfs_free(mdp);
933
934 tst->ss->getinfo_super(tst, &mdi, NULL);
935 if (mdi.journal_device_required == 0) {
936 pr_err("%s does not support journal device.\n", devname);
937 return -1;
938 }
939 disc.raid_disk = 0;
940 }
941
942 if (array->not_persistent==0) {
943 int dfd;
944 if (dv->disposition == 'j')
945 disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
946 if (dv->writemostly == FlagSet)
947 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
948 if (dv->failfast == FlagSet)
949 disc.state |= 1 << MD_DISK_FAILFAST;
950 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
951 if (tst->ss->add_to_super(tst, &disc, dfd,
952 dv->devname, INVALID_SECTORS))
953 return -1;
954 if (tst->ss->write_init_super(tst))
955 return -1;
956 } else if (dv->disposition == 'A') {
957 /* this had better be raid1.
958 * As we are "--re-add"ing we must find a spare slot
959 * to fill.
960 */
961 char *used = xcalloc(array->raid_disks, 1);
962 for (j = 0; j < tst->max_devs; j++) {
963 mdu_disk_info_t disc2;
964 disc2.number = j;
965 if (md_get_disk_info(fd, &disc2))
966 continue;
967 if (disc2.major==0 && disc2.minor==0)
968 continue;
969 if (disc2.state & 8) /* removed */
970 continue;
971 if (disc2.raid_disk < 0)
972 continue;
973 if (disc2.raid_disk > array->raid_disks)
974 continue;
975 used[disc2.raid_disk] = 1;
976 }
977 for (j = 0 ; j < array->raid_disks; j++)
978 if (!used[j]) {
979 disc.raid_disk = j;
980 disc.state |= (1<<MD_DISK_SYNC);
981 break;
982 }
983 free(used);
984 }
985
986 if (array->state & (1 << MD_SB_CLUSTERED)) {
987 if (dv->disposition == 'c')
988 disc.state |= (1 << MD_DISK_CANDIDATE);
989 else
990 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
991 }
992
993 if (dv->writemostly == FlagSet)
994 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
995 if (dv->failfast == FlagSet)
996 disc.state |= (1 << MD_DISK_FAILFAST);
997 if (tst->ss->external) {
998 /* add a disk
999 * to an external metadata container */
1000 struct mdinfo new_mdi;
1001 struct mdinfo *sra;
1002 int container_fd;
1003 char devnm[32];
1004 int dfd;
1005
1006 strcpy(devnm, fd2devnm(fd));
1007
1008 container_fd = open_dev_excl(devnm);
1009 if (container_fd < 0) {
1010 pr_err("add failed for %s: could not get exclusive access to container\n",
1011 dv->devname);
1012 tst->ss->free_super(tst);
1013 return -1;
1014 }
1015
1016 Kill(dv->devname, NULL, 0, -1, 0);
1017 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
1018 if (mdmon_running(tst->container_devnm))
1019 tst->update_tail = &tst->updates;
1020 if (tst->ss->add_to_super(tst, &disc, dfd,
1021 dv->devname, INVALID_SECTORS)) {
1022 close(dfd);
1023 close(container_fd);
1024 return -1;
1025 }
1026 if (tst->update_tail)
1027 flush_metadata_updates(tst);
1028 else
1029 tst->ss->sync_metadata(tst);
1030
1031 sra = sysfs_read(container_fd, NULL, 0);
1032 if (!sra) {
1033 pr_err("add failed for %s: sysfs_read failed\n",
1034 dv->devname);
1035 close(container_fd);
1036 tst->ss->free_super(tst);
1037 return -1;
1038 }
1039 sra->array.level = LEVEL_CONTAINER;
1040 /* Need to set data_offset and component_size */
1041 tst->ss->getinfo_super(tst, &new_mdi, NULL);
1042 new_mdi.disk.major = disc.major;
1043 new_mdi.disk.minor = disc.minor;
1044 new_mdi.recovery_start = 0;
1045 /* Make sure fds are closed as they are O_EXCL which
1046 * would block add_disk */
1047 tst->ss->free_super(tst);
1048 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
1049 pr_err("add new device to external metadata failed for %s\n", dv->devname);
1050 close(container_fd);
1051 sysfs_free(sra);
1052 return -1;
1053 }
1054 ping_monitor(devnm);
1055 sysfs_free(sra);
1056 close(container_fd);
1057 } else {
1058 tst->ss->free_super(tst);
1059 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
1060 if (dv->disposition == 'j')
1061 pr_err("Failed to hot add %s as journal, "
1062 "please try restart %s.\n", dv->devname, devname);
1063 else
1064 pr_err("add new device failed for %s as %d: %s\n",
1065 dv->devname, j, strerror(errno));
1066 return -1;
1067 }
1068 if (dv->disposition == 'j') {
1069 pr_err("Journal added successfully, making %s read-write\n", devname);
1070 if (Manage_ro(devname, fd, -1))
1071 pr_err("Failed to make %s read-write\n", devname);
1072 }
1073
1074 }
1075 if (verbose >= 0)
1076 pr_err("added %s\n", dv->devname);
1077 return 1;
1078 }
1079
1080 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
1081 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
1082 {
1083 int lfd = -1;
1084 int err;
1085
1086 if (tst->ss->external) {
1087 /* To remove a device from a container, we must
1088 * check that it isn't in use in an array.
1089 * This involves looking in the 'holders'
1090 * directory - there must be just one entry,
1091 * the container.
1092 * To ensure that it doesn't get used as a
1093 * hot spare while we are checking, we
1094 * get an O_EXCL open on the container
1095 */
1096 int ret;
1097 char devnm[32];
1098 strcpy(devnm, fd2devnm(fd));
1099 lfd = open_dev_excl(devnm);
1100 if (lfd < 0) {
1101 pr_err("Cannot get exclusive access to container - odd\n");
1102 return -1;
1103 }
1104 /* We may not be able to check on holders in
1105 * sysfs, either because we don't have the dev num
1106 * (rdev == 0) or because the device has been detached
1107 * and the 'holders' directory no longer exists
1108 * (ret == -1). In that case, assume it is OK to
1109 * remove.
1110 */
1111 if (rdev == 0)
1112 ret = -1;
1113 else {
1114 /*
1115 * The drive has already been set to 'faulty', however
1116 * monitor might not have had time to process it and the
1117 * drive might still have an entry in the 'holders'
1118 * directory. Try a few times to avoid a false error
1119 */
1120 int count = 20;
1121
1122 do {
1123 ret = sysfs_unique_holder(devnm, rdev);
1124 if (ret < 2)
1125 break;
1126 usleep(100 * 1000); /* 100ms */
1127 } while (--count > 0);
1128
1129 if (ret == 0) {
1130 pr_err("%s is not a member, cannot remove.\n",
1131 dv->devname);
1132 close(lfd);
1133 return -1;
1134 }
1135 if (ret >= 2) {
1136 pr_err("%s is still in use, cannot remove.\n",
1137 dv->devname);
1138 close(lfd);
1139 return -1;
1140 }
1141 }
1142 }
1143 /* FIXME check that it is a current member */
1144 if (sysfd >= 0) {
1145 /* device has been removed and we don't know
1146 * the major:minor number
1147 */
1148 err = sys_hot_remove_disk(sysfd, force);
1149 } else {
1150 err = hot_remove_disk(fd, rdev, force);
1151 if (err && errno == ENODEV) {
1152 /* Old kernels rejected this if no personality
1153 * is registered */
1154 struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
1155 struct mdinfo *dv = NULL;
1156 if (sra)
1157 dv = sra->devs;
1158 for ( ; dv ; dv=dv->next)
1159 if (dv->disk.major == (int)major(rdev) &&
1160 dv->disk.minor == (int)minor(rdev))
1161 break;
1162 if (dv)
1163 err = sysfs_set_str(sra, dv,
1164 "state", "remove");
1165 else
1166 err = -1;
1167 sysfs_free(sra);
1168 }
1169 }
1170 if (err) {
1171 pr_err("hot remove failed for %s: %s\n", dv->devname,
1172 strerror(errno));
1173 if (lfd >= 0)
1174 close(lfd);
1175 return -1;
1176 }
1177 if (tst->ss->external) {
1178 /*
1179 * Before dropping our exclusive open we make an
1180 * attempt at preventing mdmon from seeing an
1181 * 'add' event before reconciling this 'remove'
1182 * event.
1183 */
1184 char *devnm = fd2devnm(fd);
1185
1186 if (!devnm) {
1187 pr_err("unable to get container name\n");
1188 return -1;
1189 }
1190
1191 ping_manager(devnm);
1192 }
1193 if (lfd >= 0)
1194 close(lfd);
1195 if (verbose >= 0)
1196 pr_err("hot removed %s from %s\n",
1197 dv->devname, devname);
1198 return 1;
1199 }
1200
1201 int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
1202 unsigned long rdev, int verbose, char *devname)
1203 {
1204 struct mdinfo *mdi, *di;
1205 if (tst->ss->external) {
1206 pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
1207 return -1;
1208 }
1209 /* Need to find the device in sysfs and add 'want_replacement' to the
1210 * status.
1211 */
1212 mdi = sysfs_read(fd, NULL, GET_DEVS);
1213 if (!mdi || !mdi->devs) {
1214 pr_err("Cannot find status of %s to enable replacement - strange\n",
1215 devname);
1216 return -1;
1217 }
1218 for (di = mdi->devs; di; di = di->next)
1219 if (di->disk.major == (int)major(rdev) &&
1220 di->disk.minor == (int)minor(rdev))
1221 break;
1222 if (di) {
1223 int rv;
1224 if (di->disk.raid_disk < 0) {
1225 pr_err("%s is not active and so cannot be replaced.\n",
1226 dv->devname);
1227 sysfs_free(mdi);
1228 return -1;
1229 }
1230 rv = sysfs_set_str(mdi, di,
1231 "state", "want_replacement");
1232 if (rv) {
1233 sysfs_free(mdi);
1234 pr_err("Failed to request replacement for %s\n",
1235 dv->devname);
1236 return -1;
1237 }
1238 if (verbose >= 0)
1239 pr_err("Marked %s (device %d in %s) for replacement\n",
1240 dv->devname, di->disk.raid_disk, devname);
1241 /* If there is a matching 'with', we need to tell it which
1242 * raid disk
1243 */
1244 while (dv && dv->disposition != 'W')
1245 dv = dv->next;
1246 if (dv) {
1247 dv->disposition = 'w';
1248 dv->used = di->disk.raid_disk;
1249 }
1250 return 1;
1251 }
1252 sysfs_free(mdi);
1253 pr_err("%s not found in %s so cannot --replace it\n",
1254 dv->devname, devname);
1255 return -1;
1256 }
1257
1258 int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
1259 unsigned long rdev, int verbose, char *devname)
1260 {
1261 struct mdinfo *mdi, *di;
1262 /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
1263 mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
1264 if (!mdi || !mdi->devs) {
1265 pr_err("Cannot find status of %s to enable replacement - strange\n",
1266 devname);
1267 return -1;
1268 }
1269 for (di = mdi->devs; di; di = di->next)
1270 if (di->disk.major == (int)major(rdev) &&
1271 di->disk.minor == (int)minor(rdev))
1272 break;
1273 if (di) {
1274 int rv;
1275 if (di->disk.state & (1<<MD_DISK_FAULTY)) {
1276 pr_err("%s is faulty and cannot be a replacement\n",
1277 dv->devname);
1278 sysfs_free(mdi);
1279 return -1;
1280 }
1281 if (di->disk.raid_disk >= 0) {
1282 pr_err("%s is active and cannot be a replacement\n",
1283 dv->devname);
1284 sysfs_free(mdi);
1285 return -1;
1286 }
1287 rv = sysfs_set_num(mdi, di,
1288 "slot", dv->used);
1289 if (rv) {
1290 sysfs_free(mdi);
1291 pr_err("Failed to set %s as preferred replacement.\n",
1292 dv->devname);
1293 return -1;
1294 }
1295 if (verbose >= 0)
1296 pr_err("Marked %s in %s as replacement for device %d\n",
1297 dv->devname, devname, dv->used);
1298 return 1;
1299 }
1300 sysfs_free(mdi);
1301 pr_err("%s not found in %s so cannot make it preferred replacement\n",
1302 dv->devname, devname);
1303 return -1;
1304 }
1305
1306 int Manage_subdevs(char *devname, int fd,
1307 struct mddev_dev *devlist, int verbose, int test,
1308 char *update, int force)
1309 {
1310 /* Do something to each dev.
1311 * devmode can be
1312 * 'a' - add the device
1313 * try HOT_ADD_DISK
1314 * If that fails EINVAL, try ADD_NEW_DISK
1315 * 'S' - add the device as a spare - don't try re-add
1316 * 'j' - add the device as a journal device
1317 * 'A' - re-add the device
1318 * 'r' - remove the device: HOT_REMOVE_DISK
1319 * device can be 'faulty' or 'detached' in which case all
1320 * matching devices are removed.
1321 * 'f' - set the device faulty SET_DISK_FAULTY
1322 * device can be 'detached' in which case any device that
1323 * is inaccessible will be marked faulty.
1324 * 'R' - mark this device as wanting replacement.
1325 * 'W' - this device is added if necessary and activated as
1326 * a replacement for a previous 'R' device.
1327 * -----
1328 * 'w' - 'W' will be changed to 'w' when it is paired with
1329 * a 'R' device. If a 'W' is found while walking the list
1330 * it must be unpaired, and is an error.
1331 * 'M' - this is created by a 'missing' target. It is a slight
1332 * variant on 'A'
1333 * 'F' - Another variant of 'A', where the device was faulty
1334 * so must be removed from the array first.
1335 * 'c' - confirm the device as found (for clustered environments)
1336 *
1337 * For 'f' and 'r', the device can also be a kernel-internal
1338 * name such as 'sdb'.
1339 */
1340 mdu_array_info_t array;
1341 unsigned long long array_size;
1342 struct mddev_dev *dv;
1343 int tfd = -1;
1344 struct supertype *tst;
1345 char *subarray = NULL;
1346 int sysfd = -1;
1347 int count = 0; /* number of actions taken */
1348 struct mdinfo info;
1349 struct mdinfo devinfo;
1350 int frozen = 0;
1351 int busy = 0;
1352 int raid_slot = -1;
1353
1354 if (sysfs_init(&info, fd, NULL)) {
1355 pr_err("sysfs not availabile for %s\n", devname);
1356 goto abort;
1357 }
1358
1359 if (md_get_array_info(fd, &array)) {
1360 pr_err("Cannot get array info for %s\n", devname);
1361 goto abort;
1362 }
1363 /* array.size is only 32 bits and may be truncated.
1364 * So read from sysfs if possible, and record number of sectors
1365 */
1366
1367 array_size = get_component_size(fd);
1368 if (array_size <= 0)
1369 array_size = array.size * 2;
1370
1371 tst = super_by_fd(fd, &subarray);
1372 if (!tst) {
1373 pr_err("unsupport array - version %d.%d\n",
1374 array.major_version, array.minor_version);
1375 goto abort;
1376 }
1377
1378 for (dv = devlist; dv; dv = dv->next) {
1379 unsigned long rdev = 0; /* device to add/remove etc */
1380 int rv;
1381 int mj,mn;
1382
1383 raid_slot = -1;
1384 if (dv->disposition == 'c') {
1385 rv = parse_cluster_confirm_arg(dv->devname,
1386 &dv->devname,
1387 &raid_slot);
1388 if (rv) {
1389 pr_err("Could not get the devname of cluster\n");
1390 goto abort;
1391 }
1392 }
1393
1394 if (strcmp(dv->devname, "failed") == 0 ||
1395 strcmp(dv->devname, "faulty") == 0) {
1396 if (dv->disposition != 'A'
1397 && dv->disposition != 'r') {
1398 pr_err("%s only meaningful with -r or --re-add, not -%c\n",
1399 dv->devname, dv->disposition);
1400 goto abort;
1401 }
1402 add_faulty(dv, fd, (dv->disposition == 'A'
1403 ? 'F' : 'r'));
1404 continue;
1405 }
1406 if (strcmp(dv->devname, "detached") == 0) {
1407 if (dv->disposition != 'r' && dv->disposition != 'f') {
1408 pr_err("%s only meaningful with -r of -f, not -%c\n",
1409 dv->devname, dv->disposition);
1410 goto abort;
1411 }
1412 add_detached(dv, fd, dv->disposition);
1413 continue;
1414 }
1415
1416 if (strcmp(dv->devname, "missing") == 0) {
1417 struct mddev_dev *add_devlist;
1418 struct mddev_dev **dp;
1419 if (dv->disposition == 'c') {
1420 rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
1421 break;
1422 }
1423
1424 if (dv->disposition != 'A') {
1425 pr_err("'missing' only meaningful with --re-add\n");
1426 goto abort;
1427 }
1428 add_devlist = conf_get_devs();
1429 if (add_devlist == NULL) {
1430 pr_err("no devices to scan for missing members.");
1431 continue;
1432 }
1433 for (dp = &add_devlist; *dp; dp = & (*dp)->next)
1434 /* 'M' (for 'missing') is like 'A' without errors */
1435 (*dp)->disposition = 'M';
1436 *dp = dv->next;
1437 dv->next = add_devlist;
1438 continue;
1439 }
1440
1441 if (strncmp(dv->devname, "set-", 4) == 0 &&
1442 strlen(dv->devname) == 5) {
1443 int copies;
1444
1445 if (dv->disposition != 'r' &&
1446 dv->disposition != 'f') {
1447 pr_err("'%s' only meaningful with -r or -f\n",
1448 dv->devname);
1449 goto abort;
1450 }
1451 if (array.level != 10) {
1452 pr_err("'%s' only meaningful with RAID10 arrays\n",
1453 dv->devname);
1454 goto abort;
1455 }
1456 copies = ((array.layout & 0xff) *
1457 ((array.layout >> 8) & 0xff));
1458 if (array.raid_disks % copies != 0 ||
1459 dv->devname[4] < 'A' ||
1460 dv->devname[4] >= 'A' + copies ||
1461 copies > 26) {
1462 pr_err("'%s' not meaningful with this array\n",
1463 dv->devname);
1464 goto abort;
1465 }
1466 add_set(dv, fd, dv->devname[4]);
1467 continue;
1468 }
1469
1470 if (strchr(dv->devname, '/') == NULL &&
1471 strchr(dv->devname, ':') == NULL &&
1472 strlen(dv->devname) < 50) {
1473 /* Assume this is a kernel-internal name like 'sda1' */
1474 int found = 0;
1475 char dname[55];
1476 if (dv->disposition != 'r' && dv->disposition != 'f') {
1477 pr_err("%s only meaningful with -r or -f, not -%c\n",
1478 dv->devname, dv->disposition);
1479 goto abort;
1480 }
1481
1482 sprintf(dname, "dev-%s", dv->devname);
1483 sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
1484 if (sysfd >= 0) {
1485 char dn[20];
1486 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
1487 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
1488 rdev = makedev(mj,mn);
1489 found = 1;
1490 }
1491 close(sysfd);
1492 sysfd = -1;
1493 }
1494 if (!found) {
1495 sysfd = sysfs_open(fd2devnm(fd), dname, "state");
1496 if (sysfd < 0) {
1497 pr_err("%s does not appear to be a component of %s\n",
1498 dv->devname, devname);
1499 goto abort;
1500 }
1501 }
1502 } else if ((dv->disposition == 'r' || dv->disposition == 'f')
1503 && get_maj_min(dv->devname, &mj, &mn)) {
1504 /* for 'fail' and 'remove', the device might
1505 * not exist.
1506 */
1507 rdev = makedev(mj, mn);
1508 } else {
1509 tfd = dev_open(dv->devname, O_RDONLY);
1510 if (tfd >= 0) {
1511 fstat_is_blkdev(tfd, dv->devname, &rdev);
1512 close(tfd);
1513 } else {
1514 int open_err = errno;
1515 if (!stat_is_blkdev(dv->devname, &rdev)) {
1516 if (dv->disposition == 'M')
1517 /* non-fatal. Also improbable */
1518 continue;
1519 goto abort;
1520 }
1521 if (dv->disposition == 'r')
1522 /* Be happy, the stat worked, that is
1523 * enough for --remove
1524 */
1525 ;
1526 else {
1527 if (dv->disposition == 'M')
1528 /* non-fatal */
1529 continue;
1530 pr_err("Cannot open %s: %s\n",
1531 dv->devname, strerror(open_err));
1532 goto abort;
1533 }
1534 }
1535 }
1536 switch(dv->disposition){
1537 default:
1538 pr_err("internal error - devmode[%s]=%d\n",
1539 dv->devname, dv->disposition);
1540 goto abort;
1541 case 'a':
1542 case 'S': /* --add-spare */
1543 case 'j': /* --add-journal */
1544 case 'A':
1545 case 'M': /* --re-add missing */
1546 case 'F': /* --re-add faulty */
1547 case 'c': /* --cluster-confirm */
1548 /* add the device */
1549 if (subarray) {
1550 pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
1551 goto abort;
1552 }
1553
1554 /* Let's first try to write re-add to sysfs */
1555 if (rdev != 0 &&
1556 (dv->disposition == 'A' || dv->disposition == 'F')) {
1557 sysfs_init_dev(&devinfo, rdev);
1558 if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
1559 pr_err("re-add %s to %s succeed\n",
1560 dv->devname, info.sys_name);
1561 break;
1562 }
1563 }
1564
1565 if (dv->disposition == 'F')
1566 /* Need to remove first */
1567 hot_remove_disk(fd, rdev, force);
1568 /* Make sure it isn't in use (in 2.6 or later) */
1569 tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
1570 if (tfd >= 0) {
1571 /* We know no-one else is using it. We'll
1572 * need non-exclusive access to add it, so
1573 * do that now.
1574 */
1575 close(tfd);
1576 tfd = dev_open(dv->devname, O_RDONLY);
1577 }
1578 if (tfd < 0) {
1579 if (dv->disposition == 'M')
1580 continue;
1581 pr_err("Cannot open %s: %s\n",
1582 dv->devname, strerror(errno));
1583 goto abort;
1584 }
1585 if (!frozen) {
1586 if (sysfs_freeze_array(&info) == 1)
1587 frozen = 1;
1588 else
1589 frozen = -1;
1590 }
1591 rv = Manage_add(fd, tfd, dv, tst, &array,
1592 force, verbose, devname, update,
1593 rdev, array_size, raid_slot);
1594 close(tfd);
1595 tfd = -1;
1596 if (rv < 0)
1597 goto abort;
1598 if (rv > 0)
1599 count++;
1600 break;
1601
1602 case 'r':
1603 /* hot remove */
1604 if (subarray) {
1605 pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
1606 rv = -1;
1607 } else
1608 rv = Manage_remove(tst, fd, dv, sysfd,
1609 rdev, verbose, force,
1610 devname);
1611 if (sysfd >= 0)
1612 close(sysfd);
1613 sysfd = -1;
1614 if (rv < 0)
1615 goto abort;
1616 if (rv > 0)
1617 count++;
1618 break;
1619
1620 case 'f': /* set faulty */
1621 /* FIXME check current member */
1622 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1623 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1624 rdev))) {
1625 if (errno == EBUSY)
1626 busy = 1;
1627 pr_err("set device faulty failed for %s: %s\n",
1628 dv->devname, strerror(errno));
1629 if (sysfd >= 0)
1630 close(sysfd);
1631 goto abort;
1632 }
1633 if (sysfd >= 0)
1634 close(sysfd);
1635 sysfd = -1;
1636 count++;
1637 if (verbose >= 0)
1638 pr_err("set %s faulty in %s\n",
1639 dv->devname, devname);
1640 break;
1641 case 'R': /* Mark as replaceable */
1642 if (subarray) {
1643 pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
1644 rv = -1;
1645 } else {
1646 if (!frozen) {
1647 if (sysfs_freeze_array(&info) == 1)
1648 frozen = 1;
1649 else
1650 frozen = -1;
1651 }
1652 rv = Manage_replace(tst, fd, dv,
1653 rdev, verbose,
1654 devname);
1655 }
1656 if (rv < 0)
1657 goto abort;
1658 if (rv > 0)
1659 count++;
1660 break;
1661 case 'W': /* --with device that doesn't match */
1662 pr_err("No matching --replace device for --with %s\n",
1663 dv->devname);
1664 goto abort;
1665 case 'w': /* --with device which was matched */
1666 rv = Manage_with(tst, fd, dv,
1667 rdev, verbose, devname);
1668 if (rv < 0)
1669 goto abort;
1670 break;
1671 }
1672 }
1673 if (frozen > 0)
1674 sysfs_set_str(&info, NULL, "sync_action","idle");
1675 if (test && count == 0)
1676 return 2;
1677 return 0;
1678
1679 abort:
1680 if (frozen > 0)
1681 sysfs_set_str(&info, NULL, "sync_action","idle");
1682 return !test && busy ? 2 : 1;
1683 }
1684
1685 int autodetect(void)
1686 {
1687 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1688 int rv = 1;
1689 int fd = dev_open("9:0", O_RDONLY);
1690 if (fd >= 0) {
1691 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1692 rv = 0;
1693 close(fd);
1694 }
1695 return rv;
1696 }
1697
1698 int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
1699 {
1700 struct supertype supertype, *st = &supertype;
1701 int fd, rv = 2;
1702
1703 memset(st, 0, sizeof(*st));
1704
1705 fd = open_subarray(dev, subarray, st, verbose < 0);
1706 if (fd < 0)
1707 return 2;
1708
1709 if (!st->ss->update_subarray) {
1710 if (verbose >= 0)
1711 pr_err("Operation not supported for %s metadata\n",
1712 st->ss->name);
1713 goto free_super;
1714 }
1715
1716 if (mdmon_running(st->devnm))
1717 st->update_tail = &st->updates;
1718
1719 rv = st->ss->update_subarray(st, subarray, update, ident);
1720
1721 if (rv) {
1722 if (verbose >= 0)
1723 pr_err("Failed to update %s of subarray-%s in %s\n",
1724 update, subarray, dev);
1725 } else if (st->update_tail)
1726 flush_metadata_updates(st);
1727 else
1728 st->ss->sync_metadata(st);
1729
1730 if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
1731 pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
1732 subarray, dev);
1733
1734 free_super:
1735 st->ss->free_super(st);
1736 close(fd);
1737
1738 return rv;
1739 }
1740
1741 /* Move spare from one array to another If adding to destination array fails
1742 * add back to original array.
1743 * Returns 1 on success, 0 on failure */
1744 int move_spare(char *from_devname, char *to_devname, dev_t devid)
1745 {
1746 struct mddev_dev devlist;
1747 char devname[20];
1748
1749 /* try to remove and add */
1750 int fd1 = open(to_devname, O_RDONLY);
1751 int fd2 = open(from_devname, O_RDONLY);
1752
1753 if (fd1 < 0 || fd2 < 0) {
1754 if (fd1>=0) close(fd1);
1755 if (fd2>=0) close(fd2);
1756 return 0;
1757 }
1758
1759 devlist.next = NULL;
1760 devlist.used = 0;
1761 devlist.writemostly = FlagDefault;
1762 devlist.failfast = FlagDefault;
1763 devlist.devname = devname;
1764 sprintf(devname, "%d:%d", major(devid), minor(devid));
1765
1766 devlist.disposition = 'r';
1767 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
1768 devlist.disposition = 'a';
1769 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
1770 /* make sure manager is aware of changes */
1771 ping_manager(to_devname);
1772 ping_manager(from_devname);
1773 close(fd1);
1774 close(fd2);
1775 return 1;
1776 }
1777 else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
1778 }
1779 close(fd1);
1780 close(fd2);
1781 return 0;
1782 }