]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
9e6913208ad28c690dc22cb9daba2a52f49f847b
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include <ctype.h>
29
30 #define REGISTER_DEV _IO (MD_MAJOR, 1)
31 #define START_MD _IO (MD_MAJOR, 2)
32 #define STOP_MD _IO (MD_MAJOR, 3)
33
34 int Manage_ro(char *devname, int fd, int readonly)
35 {
36 /* switch to readonly or rw
37 *
38 * requires >= 0.90.0
39 * first check that array is runing
40 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
41 *
42 */
43 mdu_array_info_t array;
44 #ifndef MDASSEMBLE
45 struct mdinfo *mdi;
46 #endif
47 int rv = 0;
48
49 #ifndef MDASSEMBLE
50 /* If this is an externally-managed array, we need to modify the
51 * metadata_version so that mdmon doesn't undo our change.
52 */
53 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
54 if (mdi &&
55 mdi->array.major_version == -1 &&
56 is_subarray(mdi->text_version)) {
57 char vers[64];
58 strcpy(vers, "external:");
59 strcat(vers, mdi->text_version);
60 if (readonly > 0) {
61 int rv;
62 /* We set readonly ourselves. */
63 vers[9] = '-';
64 sysfs_set_str(mdi, NULL, "metadata_version", vers);
65
66 close(fd);
67 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
68
69 if (rv < 0) {
70 pr_err("failed to set readonly for %s: %s\n",
71 devname, strerror(errno));
72
73 vers[9] = mdi->text_version[0];
74 sysfs_set_str(mdi, NULL, "metadata_version", vers);
75 rv = 1;
76 goto out;
77 }
78 } else {
79 char *cp;
80 /* We cannot set read/write - must signal mdmon */
81 vers[9] = '/';
82 sysfs_set_str(mdi, NULL, "metadata_version", vers);
83
84 cp = strchr(vers+10, '/');
85 if (cp)
86 *cp = 0;
87 ping_monitor(vers+10);
88 if (mdi->array.level <= 0)
89 sysfs_set_str(mdi, NULL, "array_state", "active");
90 }
91 goto out;
92 }
93 #endif
94 if (md_get_array_info(fd, &array)) {
95 pr_err("%s does not appear to be active.\n",
96 devname);
97 rv = 1;
98 goto out;
99 }
100
101 if (readonly > 0) {
102 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
103 pr_err("failed to set readonly for %s: %s\n",
104 devname, strerror(errno));
105 rv = 1;
106 goto out;
107 }
108 } else if (readonly < 0) {
109 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
110 pr_err("failed to set writable for %s: %s\n",
111 devname, strerror(errno));
112 rv = 1;
113 goto out;
114 }
115 }
116 out:
117 #ifndef MDASSEMBLE
118 sysfs_free(mdi);
119 #endif
120 return rv;
121 }
122
123 #ifndef MDASSEMBLE
124
125 static void remove_devices(char *devnm, char *path)
126 {
127 /*
128 * Remove names at 'path' - possibly with
129 * partition suffixes - which link to the 'standard'
130 * name for devnm. These were probably created
131 * by mdadm when the array was assembled.
132 */
133 char base[40];
134 char *path2;
135 char link[1024];
136 int n;
137 int part;
138 char *be;
139 char *pe;
140
141 if (!path)
142 return;
143
144 sprintf(base, "/dev/%s", devnm);
145 be = base + strlen(base);
146
147 path2 = xmalloc(strlen(path)+20);
148 strcpy(path2, path);
149 pe = path2 + strlen(path2);
150
151 for (part = 0; part < 16; part++) {
152 if (part) {
153 sprintf(be, "p%d", part);
154
155 if (isdigit(pe[-1]))
156 sprintf(pe, "p%d", part);
157 else
158 sprintf(pe, "%d", part);
159 }
160 n = readlink(path2, link, sizeof(link));
161 if (n > 0 && (int)strlen(base) == n &&
162 strncmp(link, base, n) == 0)
163 unlink(path2);
164 }
165 free(path2);
166 }
167
168 int Manage_run(char *devname, int fd, struct context *c)
169 {
170 /* Run the array. Array must already be configured
171 * Requires >= 0.90.0
172 */
173 char nm[32], *nmp;
174
175 nmp = fd2devnm(fd);
176 if (!nmp) {
177 pr_err("Cannot find %s in sysfs!!\n", devname);
178 return 1;
179 }
180 strcpy(nm, nmp);
181 return IncrementalScan(c, nm);
182 }
183
184 int Manage_stop(char *devname, int fd, int verbose, int will_retry)
185 {
186 /* Stop the array. Array must already be configured
187 * 'will_retry' means that error messages are not wanted.
188 */
189 int rv = 0;
190 struct map_ent *map = NULL;
191 struct mdinfo *mdi;
192 char devnm[32];
193 char container[32];
194 int err;
195 int count;
196 char buf[32];
197 unsigned long long rd1, rd2;
198
199 if (will_retry && verbose == 0)
200 verbose = -1;
201
202 strcpy(devnm, fd2devnm(fd));
203 /* Get EXCL access first. If this fails, then attempting
204 * to stop is probably a bad idea.
205 */
206 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
207 if (mdi && is_subarray(mdi->text_version)) {
208 char *sl;
209 strncpy(container, mdi->text_version+1, sizeof(container));
210 container[sizeof(container)-1] = 0;
211 sl = strchr(container, '/');
212 if (sl)
213 *sl = 0;
214 } else
215 container[0] = 0;
216 close(fd);
217 count = 5;
218 while (((fd = ((devname[0] == '/')
219 ?open(devname, O_RDONLY|O_EXCL)
220 :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
221 || strcmp(fd2devnm(fd), devnm) != 0)
222 && container[0]
223 && mdmon_running(container)
224 && count) {
225 /* Can't open, so something might be wrong. However it
226 * is a container, so we might be racing with mdmon, so
227 * retry for a bit.
228 */
229 if (fd >= 0)
230 close(fd);
231 flush_mdmon(container);
232 count--;
233 }
234 if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
235 if (fd >= 0)
236 close(fd);
237 if (verbose >= 0)
238 pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
239 devname);
240 return 1;
241 }
242 /* If this is an mdmon managed array, just write 'inactive'
243 * to the array state and let mdmon clear up.
244 */
245 if (mdi &&
246 mdi->array.level > 0 &&
247 is_subarray(mdi->text_version)) {
248 int err;
249 /* This is mdmon managed. */
250 close(fd);
251
252 /* As we had an O_EXCL open, any use of the device
253 * which blocks STOP_ARRAY is probably a transient use,
254 * so it is reasonable to retry for a while - 5 seconds.
255 */
256 count = 25;
257 while (count &&
258 (err = sysfs_set_str(mdi, NULL,
259 "array_state",
260 "inactive")) < 0
261 && errno == EBUSY) {
262 usleep(200000);
263 count--;
264 }
265 if (err) {
266 if (verbose >= 0)
267 pr_err("failed to stop array %s: %s\n",
268 devname, strerror(errno));
269 rv = 1;
270 goto out;
271 }
272
273 /* Give monitor a chance to act */
274 ping_monitor(mdi->text_version);
275
276 fd = open_dev_excl(devnm);
277 if (fd < 0) {
278 if (verbose >= 0)
279 pr_err("failed to completely stop %s: Device is busy\n",
280 devname);
281 rv = 1;
282 goto out;
283 }
284 } else if (mdi &&
285 mdi->array.major_version == -1 &&
286 mdi->array.minor_version == -2 &&
287 !is_subarray(mdi->text_version)) {
288 struct mdstat_ent *mds, *m;
289 /* container, possibly mdmon-managed.
290 * Make sure mdmon isn't opening it, which
291 * would interfere with the 'stop'
292 */
293 ping_monitor(mdi->sys_name);
294
295 /* now check that there are no existing arrays
296 * which are members of this array
297 */
298 mds = mdstat_read(0, 0);
299 for (m = mds; m; m = m->next)
300 if (m->metadata_version &&
301 strncmp(m->metadata_version, "external:", 9)==0 &&
302 metadata_container_matches(m->metadata_version+9,
303 devnm)) {
304 if (verbose >= 0)
305 pr_err("Cannot stop container %s: member %s still active\n",
306 devname, m->devnm);
307 free_mdstat(mds);
308 rv = 1;
309 goto out;
310 }
311 }
312
313 /* If the array is undergoing a reshape which changes the number
314 * of devices, then it would be nice to stop it at a point where
315 * it has completed a full number of stripes in both old and
316 * new layouts as this will allow the reshape to be reverted.
317 * So if 'sync_action' is "reshape" and 'raid_disks' shows two
318 * different numbers, then
319 * - freeze reshape
320 * - set sync_max to next multiple of both data_disks and
321 * chunk sizes (or next but one)
322 * - unfreeze reshape
323 * - wait on 'sync_completed' for that point to be reached.
324 */
325 if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
326 sysfs_attribute_available(mdi, NULL, "sync_action") &&
327 sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
328 sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
329 strcmp(buf, "reshape\n") == 0 &&
330 sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
331 unsigned long long position, curr;
332 unsigned long long chunk1, chunk2;
333 unsigned long long rddiv, chunkdiv;
334 unsigned long long sectors;
335 unsigned long long sync_max, old_sync_max;
336 unsigned long long completed;
337 int backwards = 0;
338 int delay;
339 int scfd;
340
341 delay = 40;
342 while (rd1 > rd2 && delay > 0 &&
343 sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
344 /* must be in the critical section - wait a bit */
345 delay -= 1;
346 usleep(100000);
347 }
348
349 if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
350 goto done;
351 /* Array is frozen */
352
353 rd1 -= mdi->array.level == 6 ? 2 : 1;
354 rd2 -= mdi->array.level == 6 ? 2 : 1;
355 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
356 if (strncmp(buf, "back", 4) == 0)
357 backwards = 1;
358 if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
359 /* reshape must have finished now */
360 sysfs_set_str(mdi, NULL, "sync_action", "idle");
361 goto done;
362 }
363 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
364 chunk1 /= 512;
365 chunk2 /= 512;
366 rddiv = GCD(rd1, rd2);
367 chunkdiv = GCD(chunk1, chunk2);
368 sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
369
370 if (backwards) {
371 /* Need to subtract 'reshape_position' from
372 * array size to get equivalent of sync_max.
373 * Size calculation based on raid5_size in kernel.
374 */
375 unsigned long long size = mdi->component_size;
376 size &= ~(chunk1-1);
377 size &= ~(chunk2-1);
378 /* rd1 must be smaller */
379 /* Reshape may have progressed further backwards than
380 * recorded, so target even further back (hence "-1")
381 */
382 position = (position / sectors - 1) * sectors;
383 /* rd1 is always the conversion factor between 'sync'
384 * position and 'reshape' position.
385 * We read 1 "new" stripe worth of data from where-ever,
386 * and when write out that full stripe.
387 */
388 sync_max = size - position/rd1;
389 } else {
390 /* Reshape will very likely be beyond position, and it may
391 * be too late to stop at '+1', so aim for '+2'
392 */
393 position = (position / sectors + 2) * sectors;
394 sync_max = position/rd1;
395 }
396 if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
397 old_sync_max = mdi->component_size;
398 /* Must not advance sync_max as that could confuse
399 * the reshape monitor */
400 if (sync_max < old_sync_max)
401 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
402 sysfs_set_str(mdi, NULL, "sync_action", "idle");
403
404 /* That should have set things going again. Now we
405 * wait a little while (3 second max) for sync_completed
406 * to reach the target.
407 * The reshape process can block for 500msec if
408 * the sync speed limit is hit, so we need to wait
409 * a lot longer than that. 1 second is usually
410 * enough. 3 is safe.
411 */
412 delay = 3000;
413 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
414 while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
415 unsigned long long max_completed;
416 sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
417 sysfs_fd_get_str(scfd, buf, sizeof(buf));
418 if (strncmp(buf, "none", 4) == 0) {
419 /* Either reshape has aborted, or hasn't
420 * quite started yet. Wait a bit and
421 * check 'sync_action' to see.
422 */
423 usleep(10000);
424 sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
425 if (strncmp(buf, "reshape", 7) != 0)
426 break;
427 }
428
429 if (sysfs_fd_get_two(scfd, &completed,
430 &max_completed) == 2 &&
431 /* 'completed' sometimes reads as max-uulong */
432 completed < max_completed &&
433 (completed > sync_max ||
434 (completed == sync_max && curr != position))) {
435 while (completed > sync_max) {
436 sync_max += sectors / rd1;
437 if (backwards)
438 position -= sectors;
439 else
440 position += sectors;
441 }
442 if (sync_max < old_sync_max)
443 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
444 }
445
446 if (!backwards && curr >= position)
447 break;
448 if (backwards && curr <= position)
449 break;
450 sysfs_wait(scfd, &delay);
451 }
452 if (scfd >= 0)
453 close(scfd);
454
455 }
456 done:
457
458 /* As we have an O_EXCL open, any use of the device
459 * which blocks STOP_ARRAY is probably a transient use,
460 * so it is reasonable to retry for a while - 5 seconds.
461 */
462 count = 25; err = 0;
463 while (count && fd >= 0
464 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
465 && errno == EBUSY) {
466 usleep(200000);
467 count --;
468 }
469 if (fd >= 0 && err) {
470 if (verbose >= 0) {
471 pr_err("failed to stop array %s: %s\n",
472 devname, strerror(errno));
473 if (errno == EBUSY)
474 cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
475 }
476 rv = 1;
477 goto out;
478 }
479
480 if (get_linux_version() < 2006028) {
481 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
482 * was stopped, so We'll do it here just to be sure. Drop any
483 * partitions as well...
484 */
485 if (fd >= 0)
486 ioctl(fd, BLKRRPART, 0);
487 if (mdi)
488 sysfs_uevent(mdi, "change");
489 }
490
491 if (devnm[0] && use_udev()) {
492 struct map_ent *mp = map_by_devnm(&map, devnm);
493 remove_devices(devnm, mp ? mp->path : NULL);
494 }
495
496 if (verbose >= 0)
497 pr_err("stopped %s\n", devname);
498 map_lock(&map);
499 map_remove(&map, devnm);
500 map_unlock(&map);
501 out:
502 sysfs_free(mdi);
503
504 return rv;
505 }
506
507 static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
508 {
509 struct mddev_dev *new;
510 new = xmalloc(sizeof(*new));
511 memset(new, 0, sizeof(*new));
512 new->devname = xstrdup(name);
513 new->disposition = disp;
514 new->next = dv->next;
515 dv->next = new;
516 return new;
517 }
518
519 static void add_faulty(struct mddev_dev *dv, int fd, char disp)
520 {
521 mdu_array_info_t array;
522 mdu_disk_info_t disk;
523 int remaining_disks;
524 int i;
525
526 if (md_get_array_info(fd, &array) != 0)
527 return;
528
529 remaining_disks = array.nr_disks;
530 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
531 char buf[40];
532 disk.number = i;
533 if (md_get_disk_info(fd, &disk) != 0)
534 continue;
535 if (disk.major == 0 && disk.minor == 0)
536 continue;
537 remaining_disks--;
538 if ((disk.state & 1) == 0) /* not faulty */
539 continue;
540 sprintf(buf, "%d:%d", disk.major, disk.minor);
541 dv = add_one(dv, buf, disp);
542 }
543 }
544
545 static void add_detached(struct mddev_dev *dv, int fd, char disp)
546 {
547 mdu_array_info_t array;
548 mdu_disk_info_t disk;
549 int remaining_disks;
550 int i;
551
552 if (md_get_array_info(fd, &array) != 0)
553 return;
554
555 remaining_disks = array.nr_disks;
556 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
557 char buf[40];
558 int sfd;
559 disk.number = i;
560 if (md_get_disk_info(fd, &disk) != 0)
561 continue;
562 if (disk.major == 0 && disk.minor == 0)
563 continue;
564 remaining_disks--;
565 if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
566 continue;
567 sprintf(buf, "%d:%d", disk.major, disk.minor);
568 sfd = dev_open(buf, O_RDONLY);
569 if (sfd >= 0) {
570 /* Not detached */
571 close(sfd);
572 continue;
573 }
574 if (errno != ENXIO)
575 /* Probably not detached */
576 continue;
577 dv = add_one(dv, buf, disp);
578 }
579 }
580
581 static void add_set(struct mddev_dev *dv, int fd, char set_char)
582 {
583 mdu_array_info_t array;
584 mdu_disk_info_t disk;
585 int remaining_disks;
586 int copies, set;
587 int i;
588
589 if (md_get_array_info(fd, &array) != 0)
590 return;
591 if (array.level != 10)
592 return;
593 copies = ((array.layout & 0xff) *
594 ((array.layout >> 8) & 0xff));
595 if (array.raid_disks % copies)
596 return;
597
598 remaining_disks = array.nr_disks;
599 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
600 char buf[40];
601 disk.number = i;
602 if (md_get_disk_info(fd, &disk) != 0)
603 continue;
604 if (disk.major == 0 && disk.minor == 0)
605 continue;
606 remaining_disks--;
607 set = disk.raid_disk % copies;
608 if (set_char != set + 'A')
609 continue;
610 sprintf(buf, "%d:%d", disk.major, disk.minor);
611 dv = add_one(dv, buf, dv->disposition);
612 }
613 }
614
615 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
616 struct supertype *dev_st, struct supertype *tst,
617 unsigned long rdev,
618 char *update, char *devname, int verbose,
619 mdu_array_info_t *array)
620 {
621 struct mdinfo mdi;
622 int duuid[4];
623 int ouuid[4];
624
625 dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
626 dev_st->ss->uuid_from_super(dev_st, ouuid);
627 if (tst->sb)
628 tst->ss->uuid_from_super(tst, duuid);
629 else
630 /* Assume uuid matches: kernel will check */
631 memcpy(duuid, ouuid, sizeof(ouuid));
632 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
633 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
634 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
635 /* Looks like it is worth a
636 * try. Need to make sure
637 * kernel will accept it
638 * though.
639 */
640 mdu_disk_info_t disc;
641 /* re-add doesn't work for version-1 superblocks
642 * before 2.6.18 :-(
643 */
644 if (array->major_version == 1 &&
645 get_linux_version() <= 2006018)
646 goto skip_re_add;
647 disc.number = mdi.disk.number;
648 if (md_get_disk_info(fd, &disc) != 0 ||
649 disc.major != 0 || disc.minor != 0)
650 goto skip_re_add;
651 disc.major = major(rdev);
652 disc.minor = minor(rdev);
653 disc.number = mdi.disk.number;
654 disc.raid_disk = mdi.disk.raid_disk;
655 disc.state = mdi.disk.state;
656 if (array->state & (1 << MD_SB_CLUSTERED)) {
657 /* extra flags are needed when adding to a cluster as
658 * there are two cases to distinguish
659 */
660 if (dv->disposition == 'c')
661 disc.state |= (1 << MD_DISK_CANDIDATE);
662 else
663 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
664 }
665 if (dv->writemostly == FlagSet)
666 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
667 if (dv->writemostly == FlagClear)
668 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
669 if (dv->failfast == FlagSet)
670 disc.state |= 1 << MD_DISK_FAILFAST;
671 if (dv->failfast == FlagClear)
672 disc.state &= ~(1 << MD_DISK_FAILFAST);
673 remove_partitions(tfd);
674 if (update || dv->writemostly != FlagDefault
675 || dv->failfast != FlagDefault) {
676 int rv = -1;
677 tfd = dev_open(dv->devname, O_RDWR);
678 if (tfd < 0) {
679 pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
680 return -1;
681 }
682
683 if (dv->writemostly == FlagSet)
684 rv = dev_st->ss->update_super(
685 dev_st, NULL, "writemostly",
686 devname, verbose, 0, NULL);
687 if (dv->writemostly == FlagClear)
688 rv = dev_st->ss->update_super(
689 dev_st, NULL, "readwrite",
690 devname, verbose, 0, NULL);
691 if (dv->failfast == FlagSet)
692 rv = dev_st->ss->update_super(
693 dev_st, NULL, "failfast",
694 devname, verbose, 0, NULL);
695 if (dv->failfast == FlagClear)
696 rv = dev_st->ss->update_super(
697 dev_st, NULL, "nofailfast",
698 devname, verbose, 0, NULL);
699 if (update)
700 rv = dev_st->ss->update_super(
701 dev_st, NULL, update,
702 devname, verbose, 0, NULL);
703 if (rv == 0)
704 rv = dev_st->ss->store_super(dev_st, tfd);
705 close(tfd);
706 if (rv != 0) {
707 pr_err("failed to update superblock during re-add\n");
708 return -1;
709 }
710 }
711 /* don't even try if disk is marked as faulty */
712 errno = 0;
713 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
714 if (verbose >= 0)
715 pr_err("re-added %s\n", dv->devname);
716 return 1;
717 }
718 if (errno == ENOMEM || errno == EROFS) {
719 pr_err("add new device failed for %s: %s\n",
720 dv->devname, strerror(errno));
721 if (dv->disposition == 'M')
722 return 0;
723 return -1;
724 }
725 }
726 skip_re_add:
727 return 0;
728 }
729
730 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
731 struct supertype *tst, mdu_array_info_t *array,
732 int force, int verbose, char *devname,
733 char *update, unsigned long rdev, unsigned long long array_size,
734 int raid_slot)
735 {
736 unsigned long long ldsize;
737 struct supertype *dev_st;
738 int j;
739 mdu_disk_info_t disc;
740
741 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
742 if (dv->disposition == 'M')
743 return 0;
744 else
745 return -1;
746 }
747
748 if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
749 /* More than 4TB is wasted on v0.90 */
750 if (!force) {
751 pr_err("%s is larger than %s can effectively use.\n"
752 " Add --force is you really want to add this device.\n",
753 dv->devname, devname);
754 return -1;
755 }
756 pr_err("%s is larger than %s can effectively use.\n"
757 " Adding anyway as --force was given.\n",
758 dv->devname, devname);
759 }
760 if (!tst->ss->external && array->major_version == 0) {
761 if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
762 if (verbose >= 0)
763 pr_err("hot added %s\n",
764 dv->devname);
765 return 1;
766 }
767
768 pr_err("hot add failed for %s: %s\n",
769 dv->devname, strerror(errno));
770 return -1;
771 }
772
773 if (array->not_persistent == 0 || tst->ss->external) {
774
775 /* need to find a sample superblock to copy, and
776 * a spare slot to use.
777 * For 'external' array (well, container based),
778 * We can just load the metadata for the array->
779 */
780 int array_failed;
781 if (tst->sb)
782 /* already loaded */;
783 else if (tst->ss->external) {
784 tst->ss->load_container(tst, fd, NULL);
785 } else for (j = 0; j < tst->max_devs; j++) {
786 char *dev;
787 int dfd;
788 disc.number = j;
789 if (md_get_disk_info(fd, &disc))
790 continue;
791 if (disc.major==0 && disc.minor==0)
792 continue;
793 if ((disc.state & 4)==0) /* sync */
794 continue;
795 /* Looks like a good device to try */
796 dev = map_dev(disc.major, disc.minor, 1);
797 if (!dev)
798 continue;
799 dfd = dev_open(dev, O_RDONLY);
800 if (dfd < 0)
801 continue;
802 if (tst->ss->load_super(tst, dfd,
803 NULL)) {
804 close(dfd);
805 continue;
806 }
807 close(dfd);
808 break;
809 }
810 /* FIXME this is a bad test to be using */
811 if (!tst->sb && (dv->disposition != 'a'
812 && dv->disposition != 'S')) {
813 /* we are re-adding a device to a
814 * completely dead array - have to depend
815 * on kernel to check
816 */
817 } else if (!tst->sb) {
818 pr_err("cannot load array metadata from %s\n", devname);
819 return -1;
820 }
821
822 /* Make sure device is large enough */
823 if (dv->disposition != 'j' && /* skip size check for Journal */
824 tst->sb &&
825 tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
826 array_size) {
827 if (dv->disposition == 'M')
828 return 0;
829 pr_err("%s not large enough to join array\n",
830 dv->devname);
831 return -1;
832 }
833
834 /* Possibly this device was recently part of
835 * the array and was temporarily removed, and
836 * is now being re-added. If so, we can
837 * simply re-add it.
838 */
839
840 if (array->not_persistent == 0) {
841 dev_st = dup_super(tst);
842 dev_st->ss->load_super(dev_st, tfd, NULL);
843 if (dev_st->sb && dv->disposition != 'S') {
844 int rv;
845
846 rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
847 rdev, update, devname,
848 verbose, array);
849 dev_st->ss->free_super(dev_st);
850 if (rv)
851 return rv;
852 }
853 }
854 if (dv->disposition == 'M') {
855 if (verbose > 0)
856 pr_err("--re-add for %s to %s is not possible\n",
857 dv->devname, devname);
858 return 0;
859 }
860 if (dv->disposition == 'A') {
861 pr_err("--re-add for %s to %s is not possible\n",
862 dv->devname, devname);
863 return -1;
864 }
865 if (array->active_disks < array->raid_disks) {
866 char *avail = xcalloc(array->raid_disks, 1);
867 int d;
868 int found = 0;
869
870 for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
871 disc.number = d;
872 if (md_get_disk_info(fd, &disc))
873 continue;
874 if (disc.major == 0 && disc.minor == 0)
875 continue;
876 if (!(disc.state & (1<<MD_DISK_SYNC)))
877 continue;
878 avail[disc.raid_disk] = 1;
879 found++;
880 }
881 array_failed = !enough(array->level, array->raid_disks,
882 array->layout, 1, avail);
883 free(avail);
884 } else
885 array_failed = 0;
886 if (array_failed) {
887 pr_err("%s has failed so using --add cannot work and might destroy\n",
888 devname);
889 pr_err("data on %s. You should stop the array and re-assemble it.\n",
890 dv->devname);
891 return -1;
892 }
893 } else {
894 /* non-persistent. Must ensure that new drive
895 * is at least array->size big.
896 */
897 if (ldsize/512 < array_size) {
898 pr_err("%s not large enough to join array\n",
899 dv->devname);
900 return -1;
901 }
902 }
903 /* committed to really trying this device now*/
904 remove_partitions(tfd);
905
906 /* in 2.6.17 and earlier, version-1 superblocks won't
907 * use the number we write, but will choose a free number.
908 * we must choose the same free number, which requires
909 * starting at 'raid_disks' and counting up
910 */
911 for (j = array->raid_disks; j < tst->max_devs; j++) {
912 disc.number = j;
913 if (md_get_disk_info(fd, &disc))
914 break;
915 if (disc.major==0 && disc.minor==0)
916 break;
917 if (disc.state & 8) /* removed */
918 break;
919 }
920 disc.major = major(rdev);
921 disc.minor = minor(rdev);
922 if (raid_slot < 0)
923 disc.number = j;
924 else
925 disc.number = raid_slot;
926 disc.state = 0;
927
928 /* only add journal to array that supports journaling */
929 if (dv->disposition == 'j') {
930 struct mdinfo mdi;
931 struct mdinfo *mdp;
932
933 mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
934 if (!mdp) {
935 pr_err("%s unable to read array state.\n", devname);
936 return -1;
937 }
938
939 if (strncmp(mdp->sysfs_array_state, "readonly", 8) != 0) {
940 sysfs_free(mdp);
941 pr_err("%s is not readonly, cannot add journal.\n", devname);
942 return -1;
943 }
944
945 sysfs_free(mdp);
946
947 tst->ss->getinfo_super(tst, &mdi, NULL);
948 if (mdi.journal_device_required == 0) {
949 pr_err("%s does not support journal device.\n", devname);
950 return -1;
951 }
952 disc.raid_disk = 0;
953 }
954
955 if (array->not_persistent==0) {
956 int dfd;
957 if (dv->disposition == 'j')
958 disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
959 if (dv->writemostly == FlagSet)
960 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
961 if (dv->failfast == FlagSet)
962 disc.state |= 1 << MD_DISK_FAILFAST;
963 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
964 if (tst->ss->add_to_super(tst, &disc, dfd,
965 dv->devname, INVALID_SECTORS))
966 return -1;
967 if (tst->ss->write_init_super(tst))
968 return -1;
969 } else if (dv->disposition == 'A') {
970 /* this had better be raid1.
971 * As we are "--re-add"ing we must find a spare slot
972 * to fill.
973 */
974 char *used = xcalloc(array->raid_disks, 1);
975 for (j = 0; j < tst->max_devs; j++) {
976 mdu_disk_info_t disc2;
977 disc2.number = j;
978 if (md_get_disk_info(fd, &disc2))
979 continue;
980 if (disc2.major==0 && disc2.minor==0)
981 continue;
982 if (disc2.state & 8) /* removed */
983 continue;
984 if (disc2.raid_disk < 0)
985 continue;
986 if (disc2.raid_disk > array->raid_disks)
987 continue;
988 used[disc2.raid_disk] = 1;
989 }
990 for (j = 0 ; j < array->raid_disks; j++)
991 if (!used[j]) {
992 disc.raid_disk = j;
993 disc.state |= (1<<MD_DISK_SYNC);
994 break;
995 }
996 free(used);
997 }
998
999 if (array->state & (1 << MD_SB_CLUSTERED)) {
1000 if (dv->disposition == 'c')
1001 disc.state |= (1 << MD_DISK_CANDIDATE);
1002 else
1003 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
1004 }
1005
1006 if (dv->writemostly == FlagSet)
1007 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
1008 if (dv->failfast == FlagSet)
1009 disc.state |= (1 << MD_DISK_FAILFAST);
1010 if (tst->ss->external) {
1011 /* add a disk
1012 * to an external metadata container */
1013 struct mdinfo new_mdi;
1014 struct mdinfo *sra;
1015 int container_fd;
1016 char devnm[32];
1017 int dfd;
1018
1019 strcpy(devnm, fd2devnm(fd));
1020
1021 container_fd = open_dev_excl(devnm);
1022 if (container_fd < 0) {
1023 pr_err("add failed for %s: could not get exclusive access to container\n",
1024 dv->devname);
1025 tst->ss->free_super(tst);
1026 return -1;
1027 }
1028
1029 Kill(dv->devname, NULL, 0, -1, 0);
1030 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
1031 if (mdmon_running(tst->container_devnm))
1032 tst->update_tail = &tst->updates;
1033 if (tst->ss->add_to_super(tst, &disc, dfd,
1034 dv->devname, INVALID_SECTORS)) {
1035 close(dfd);
1036 close(container_fd);
1037 return -1;
1038 }
1039 if (tst->update_tail)
1040 flush_metadata_updates(tst);
1041 else
1042 tst->ss->sync_metadata(tst);
1043
1044 sra = sysfs_read(container_fd, NULL, 0);
1045 if (!sra) {
1046 pr_err("add failed for %s: sysfs_read failed\n",
1047 dv->devname);
1048 close(container_fd);
1049 tst->ss->free_super(tst);
1050 return -1;
1051 }
1052 sra->array.level = LEVEL_CONTAINER;
1053 /* Need to set data_offset and component_size */
1054 tst->ss->getinfo_super(tst, &new_mdi, NULL);
1055 new_mdi.disk.major = disc.major;
1056 new_mdi.disk.minor = disc.minor;
1057 new_mdi.recovery_start = 0;
1058 /* Make sure fds are closed as they are O_EXCL which
1059 * would block add_disk */
1060 tst->ss->free_super(tst);
1061 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
1062 pr_err("add new device to external metadata failed for %s\n", dv->devname);
1063 close(container_fd);
1064 sysfs_free(sra);
1065 return -1;
1066 }
1067 ping_monitor(devnm);
1068 sysfs_free(sra);
1069 close(container_fd);
1070 } else {
1071 tst->ss->free_super(tst);
1072 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
1073 if (dv->disposition == 'j')
1074 pr_err("Failed to hot add %s as journal, "
1075 "please try restart %s.\n", dv->devname, devname);
1076 else
1077 pr_err("add new device failed for %s as %d: %s\n",
1078 dv->devname, j, strerror(errno));
1079 return -1;
1080 }
1081 if (dv->disposition == 'j') {
1082 pr_err("Journal added successfully, making %s read-write\n", devname);
1083 if (Manage_ro(devname, fd, -1))
1084 pr_err("Failed to make %s read-write\n", devname);
1085 }
1086
1087 }
1088 if (verbose >= 0)
1089 pr_err("added %s\n", dv->devname);
1090 return 1;
1091 }
1092
1093 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
1094 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
1095 {
1096 int lfd = -1;
1097 int err;
1098
1099 if (tst->ss->external) {
1100 /* To remove a device from a container, we must
1101 * check that it isn't in use in an array.
1102 * This involves looking in the 'holders'
1103 * directory - there must be just one entry,
1104 * the container.
1105 * To ensure that it doesn't get used as a
1106 * hot spare while we are checking, we
1107 * get an O_EXCL open on the container
1108 */
1109 int ret;
1110 char devnm[32];
1111 strcpy(devnm, fd2devnm(fd));
1112 lfd = open_dev_excl(devnm);
1113 if (lfd < 0) {
1114 pr_err("Cannot get exclusive access to container - odd\n");
1115 return -1;
1116 }
1117 /* We may not be able to check on holders in
1118 * sysfs, either because we don't have the dev num
1119 * (rdev == 0) or because the device has been detached
1120 * and the 'holders' directory no longer exists
1121 * (ret == -1). In that case, assume it is OK to
1122 * remove.
1123 */
1124 if (rdev == 0)
1125 ret = -1;
1126 else {
1127 /*
1128 * The drive has already been set to 'faulty', however
1129 * monitor might not have had time to process it and the
1130 * drive might still have an entry in the 'holders'
1131 * directory. Try a few times to avoid a false error
1132 */
1133 int count = 20;
1134
1135 do {
1136 ret = sysfs_unique_holder(devnm, rdev);
1137 if (ret < 2)
1138 break;
1139 usleep(100 * 1000); /* 100ms */
1140 } while (--count > 0);
1141
1142 if (ret == 0) {
1143 pr_err("%s is not a member, cannot remove.\n",
1144 dv->devname);
1145 close(lfd);
1146 return -1;
1147 }
1148 if (ret >= 2) {
1149 pr_err("%s is still in use, cannot remove.\n",
1150 dv->devname);
1151 close(lfd);
1152 return -1;
1153 }
1154 }
1155 }
1156 /* FIXME check that it is a current member */
1157 if (sysfd >= 0) {
1158 /* device has been removed and we don't know
1159 * the major:minor number
1160 */
1161 err = sys_hot_remove_disk(sysfd, force);
1162 } else {
1163 err = hot_remove_disk(fd, rdev, force);
1164 if (err && errno == ENODEV) {
1165 /* Old kernels rejected this if no personality
1166 * is registered */
1167 struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
1168 struct mdinfo *dv = NULL;
1169 if (sra)
1170 dv = sra->devs;
1171 for ( ; dv ; dv=dv->next)
1172 if (dv->disk.major == (int)major(rdev) &&
1173 dv->disk.minor == (int)minor(rdev))
1174 break;
1175 if (dv)
1176 err = sysfs_set_str(sra, dv,
1177 "state", "remove");
1178 else
1179 err = -1;
1180 sysfs_free(sra);
1181 }
1182 }
1183 if (err) {
1184 pr_err("hot remove failed for %s: %s\n", dv->devname,
1185 strerror(errno));
1186 if (lfd >= 0)
1187 close(lfd);
1188 return -1;
1189 }
1190 if (tst->ss->external) {
1191 /*
1192 * Before dropping our exclusive open we make an
1193 * attempt at preventing mdmon from seeing an
1194 * 'add' event before reconciling this 'remove'
1195 * event.
1196 */
1197 char *devnm = fd2devnm(fd);
1198
1199 if (!devnm) {
1200 pr_err("unable to get container name\n");
1201 return -1;
1202 }
1203
1204 ping_manager(devnm);
1205 }
1206 if (lfd >= 0)
1207 close(lfd);
1208 if (verbose >= 0)
1209 pr_err("hot removed %s from %s\n",
1210 dv->devname, devname);
1211 return 1;
1212 }
1213
1214 int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
1215 unsigned long rdev, int verbose, char *devname)
1216 {
1217 struct mdinfo *mdi, *di;
1218 if (tst->ss->external) {
1219 pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
1220 return -1;
1221 }
1222 /* Need to find the device in sysfs and add 'want_replacement' to the
1223 * status.
1224 */
1225 mdi = sysfs_read(fd, NULL, GET_DEVS);
1226 if (!mdi || !mdi->devs) {
1227 pr_err("Cannot find status of %s to enable replacement - strange\n",
1228 devname);
1229 return -1;
1230 }
1231 for (di = mdi->devs; di; di = di->next)
1232 if (di->disk.major == (int)major(rdev) &&
1233 di->disk.minor == (int)minor(rdev))
1234 break;
1235 if (di) {
1236 int rv;
1237 if (di->disk.raid_disk < 0) {
1238 pr_err("%s is not active and so cannot be replaced.\n",
1239 dv->devname);
1240 sysfs_free(mdi);
1241 return -1;
1242 }
1243 rv = sysfs_set_str(mdi, di,
1244 "state", "want_replacement");
1245 if (rv) {
1246 sysfs_free(mdi);
1247 pr_err("Failed to request replacement for %s\n",
1248 dv->devname);
1249 return -1;
1250 }
1251 if (verbose >= 0)
1252 pr_err("Marked %s (device %d in %s) for replacement\n",
1253 dv->devname, di->disk.raid_disk, devname);
1254 /* If there is a matching 'with', we need to tell it which
1255 * raid disk
1256 */
1257 while (dv && dv->disposition != 'W')
1258 dv = dv->next;
1259 if (dv) {
1260 dv->disposition = 'w';
1261 dv->used = di->disk.raid_disk;
1262 }
1263 return 1;
1264 }
1265 sysfs_free(mdi);
1266 pr_err("%s not found in %s so cannot --replace it\n",
1267 dv->devname, devname);
1268 return -1;
1269 }
1270
1271 int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
1272 unsigned long rdev, int verbose, char *devname)
1273 {
1274 struct mdinfo *mdi, *di;
1275 /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
1276 mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
1277 if (!mdi || !mdi->devs) {
1278 pr_err("Cannot find status of %s to enable replacement - strange\n",
1279 devname);
1280 return -1;
1281 }
1282 for (di = mdi->devs; di; di = di->next)
1283 if (di->disk.major == (int)major(rdev) &&
1284 di->disk.minor == (int)minor(rdev))
1285 break;
1286 if (di) {
1287 int rv;
1288 if (di->disk.state & (1<<MD_DISK_FAULTY)) {
1289 pr_err("%s is faulty and cannot be a replacement\n",
1290 dv->devname);
1291 sysfs_free(mdi);
1292 return -1;
1293 }
1294 if (di->disk.raid_disk >= 0) {
1295 pr_err("%s is active and cannot be a replacement\n",
1296 dv->devname);
1297 sysfs_free(mdi);
1298 return -1;
1299 }
1300 rv = sysfs_set_num(mdi, di,
1301 "slot", dv->used);
1302 if (rv) {
1303 sysfs_free(mdi);
1304 pr_err("Failed to set %s as preferred replacement.\n",
1305 dv->devname);
1306 return -1;
1307 }
1308 if (verbose >= 0)
1309 pr_err("Marked %s in %s as replacement for device %d\n",
1310 dv->devname, devname, dv->used);
1311 return 1;
1312 }
1313 sysfs_free(mdi);
1314 pr_err("%s not found in %s so cannot make it preferred replacement\n",
1315 dv->devname, devname);
1316 return -1;
1317 }
1318
1319 int Manage_subdevs(char *devname, int fd,
1320 struct mddev_dev *devlist, int verbose, int test,
1321 char *update, int force)
1322 {
1323 /* Do something to each dev.
1324 * devmode can be
1325 * 'a' - add the device
1326 * try HOT_ADD_DISK
1327 * If that fails EINVAL, try ADD_NEW_DISK
1328 * 'S' - add the device as a spare - don't try re-add
1329 * 'j' - add the device as a journal device
1330 * 'A' - re-add the device
1331 * 'r' - remove the device: HOT_REMOVE_DISK
1332 * device can be 'faulty' or 'detached' in which case all
1333 * matching devices are removed.
1334 * 'f' - set the device faulty SET_DISK_FAULTY
1335 * device can be 'detached' in which case any device that
1336 * is inaccessible will be marked faulty.
1337 * 'R' - mark this device as wanting replacement.
1338 * 'W' - this device is added if necessary and activated as
1339 * a replacement for a previous 'R' device.
1340 * -----
1341 * 'w' - 'W' will be changed to 'w' when it is paired with
1342 * a 'R' device. If a 'W' is found while walking the list
1343 * it must be unpaired, and is an error.
1344 * 'M' - this is created by a 'missing' target. It is a slight
1345 * variant on 'A'
1346 * 'F' - Another variant of 'A', where the device was faulty
1347 * so must be removed from the array first.
1348 * 'c' - confirm the device as found (for clustered environments)
1349 *
1350 * For 'f' and 'r', the device can also be a kernel-internal
1351 * name such as 'sdb'.
1352 */
1353 mdu_array_info_t array;
1354 unsigned long long array_size;
1355 struct mddev_dev *dv;
1356 int tfd = -1;
1357 struct supertype *tst;
1358 char *subarray = NULL;
1359 int sysfd = -1;
1360 int count = 0; /* number of actions taken */
1361 struct mdinfo info;
1362 struct mdinfo devinfo;
1363 int frozen = 0;
1364 int busy = 0;
1365 int raid_slot = -1;
1366
1367 if (sysfs_init(&info, fd, NULL)) {
1368 pr_err("sysfs not availabile for %s\n", devname);
1369 goto abort;
1370 }
1371
1372 if (md_get_array_info(fd, &array)) {
1373 pr_err("Cannot get array info for %s\n", devname);
1374 goto abort;
1375 }
1376 /* array.size is only 32 bits and may be truncated.
1377 * So read from sysfs if possible, and record number of sectors
1378 */
1379
1380 array_size = get_component_size(fd);
1381 if (array_size <= 0)
1382 array_size = array.size * 2;
1383
1384 tst = super_by_fd(fd, &subarray);
1385 if (!tst) {
1386 pr_err("unsupport array - version %d.%d\n",
1387 array.major_version, array.minor_version);
1388 goto abort;
1389 }
1390
1391 for (dv = devlist; dv; dv = dv->next) {
1392 unsigned long rdev = 0; /* device to add/remove etc */
1393 int rv;
1394 int mj,mn;
1395
1396 raid_slot = -1;
1397 if (dv->disposition == 'c') {
1398 rv = parse_cluster_confirm_arg(dv->devname,
1399 &dv->devname,
1400 &raid_slot);
1401 if (rv) {
1402 pr_err("Could not get the devname of cluster\n");
1403 goto abort;
1404 }
1405 }
1406
1407 if (strcmp(dv->devname, "failed") == 0 ||
1408 strcmp(dv->devname, "faulty") == 0) {
1409 if (dv->disposition != 'A'
1410 && dv->disposition != 'r') {
1411 pr_err("%s only meaningful with -r or --re-add, not -%c\n",
1412 dv->devname, dv->disposition);
1413 goto abort;
1414 }
1415 add_faulty(dv, fd, (dv->disposition == 'A'
1416 ? 'F' : 'r'));
1417 continue;
1418 }
1419 if (strcmp(dv->devname, "detached") == 0) {
1420 if (dv->disposition != 'r' && dv->disposition != 'f') {
1421 pr_err("%s only meaningful with -r of -f, not -%c\n",
1422 dv->devname, dv->disposition);
1423 goto abort;
1424 }
1425 add_detached(dv, fd, dv->disposition);
1426 continue;
1427 }
1428
1429 if (strcmp(dv->devname, "missing") == 0) {
1430 struct mddev_dev *add_devlist;
1431 struct mddev_dev **dp;
1432 if (dv->disposition == 'c') {
1433 rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
1434 break;
1435 }
1436
1437 if (dv->disposition != 'A') {
1438 pr_err("'missing' only meaningful with --re-add\n");
1439 goto abort;
1440 }
1441 add_devlist = conf_get_devs();
1442 if (add_devlist == NULL) {
1443 pr_err("no devices to scan for missing members.");
1444 continue;
1445 }
1446 for (dp = &add_devlist; *dp; dp = & (*dp)->next)
1447 /* 'M' (for 'missing') is like 'A' without errors */
1448 (*dp)->disposition = 'M';
1449 *dp = dv->next;
1450 dv->next = add_devlist;
1451 continue;
1452 }
1453
1454 if (strncmp(dv->devname, "set-", 4) == 0 &&
1455 strlen(dv->devname) == 5) {
1456 int copies;
1457
1458 if (dv->disposition != 'r' &&
1459 dv->disposition != 'f') {
1460 pr_err("'%s' only meaningful with -r or -f\n",
1461 dv->devname);
1462 goto abort;
1463 }
1464 if (array.level != 10) {
1465 pr_err("'%s' only meaningful with RAID10 arrays\n",
1466 dv->devname);
1467 goto abort;
1468 }
1469 copies = ((array.layout & 0xff) *
1470 ((array.layout >> 8) & 0xff));
1471 if (array.raid_disks % copies != 0 ||
1472 dv->devname[4] < 'A' ||
1473 dv->devname[4] >= 'A' + copies ||
1474 copies > 26) {
1475 pr_err("'%s' not meaningful with this array\n",
1476 dv->devname);
1477 goto abort;
1478 }
1479 add_set(dv, fd, dv->devname[4]);
1480 continue;
1481 }
1482
1483 if (strchr(dv->devname, '/') == NULL &&
1484 strchr(dv->devname, ':') == NULL &&
1485 strlen(dv->devname) < 50) {
1486 /* Assume this is a kernel-internal name like 'sda1' */
1487 int found = 0;
1488 char dname[55];
1489 if (dv->disposition != 'r' && dv->disposition != 'f') {
1490 pr_err("%s only meaningful with -r or -f, not -%c\n",
1491 dv->devname, dv->disposition);
1492 goto abort;
1493 }
1494
1495 sprintf(dname, "dev-%s", dv->devname);
1496 sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
1497 if (sysfd >= 0) {
1498 char dn[20];
1499 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
1500 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
1501 rdev = makedev(mj,mn);
1502 found = 1;
1503 }
1504 close(sysfd);
1505 sysfd = -1;
1506 }
1507 if (!found) {
1508 sysfd = sysfs_open(fd2devnm(fd), dname, "state");
1509 if (sysfd < 0) {
1510 pr_err("%s does not appear to be a component of %s\n",
1511 dv->devname, devname);
1512 goto abort;
1513 }
1514 }
1515 } else if ((dv->disposition == 'r' || dv->disposition == 'f')
1516 && get_maj_min(dv->devname, &mj, &mn)) {
1517 /* for 'fail' and 'remove', the device might
1518 * not exist.
1519 */
1520 rdev = makedev(mj, mn);
1521 } else {
1522 struct stat stb;
1523 tfd = dev_open(dv->devname, O_RDONLY);
1524 if (tfd >= 0) {
1525 fstat(tfd, &stb);
1526 close(tfd);
1527 } else {
1528 int open_err = errno;
1529 if (stat(dv->devname, &stb) != 0) {
1530 pr_err("Cannot find %s: %s\n",
1531 dv->devname, strerror(errno));
1532 goto abort;
1533 }
1534 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
1535 if (dv->disposition == 'M')
1536 /* non-fatal. Also improbable */
1537 continue;
1538 pr_err("%s is not a block device.\n",
1539 dv->devname);
1540 goto abort;
1541 }
1542 if (dv->disposition == 'r')
1543 /* Be happy, the stat worked, that is
1544 * enough for --remove
1545 */
1546 ;
1547 else {
1548 if (dv->disposition == 'M')
1549 /* non-fatal */
1550 continue;
1551 pr_err("Cannot open %s: %s\n",
1552 dv->devname, strerror(open_err));
1553 goto abort;
1554 }
1555 }
1556 rdev = stb.st_rdev;
1557 }
1558 switch(dv->disposition){
1559 default:
1560 pr_err("internal error - devmode[%s]=%d\n",
1561 dv->devname, dv->disposition);
1562 goto abort;
1563 case 'a':
1564 case 'S': /* --add-spare */
1565 case 'j': /* --add-journal */
1566 case 'A':
1567 case 'M': /* --re-add missing */
1568 case 'F': /* --re-add faulty */
1569 case 'c': /* --cluster-confirm */
1570 /* add the device */
1571 if (subarray) {
1572 pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
1573 goto abort;
1574 }
1575
1576 /* Let's first try to write re-add to sysfs */
1577 if (rdev != 0 &&
1578 (dv->disposition == 'A' || dv->disposition == 'F')) {
1579 sysfs_init_dev(&devinfo, rdev);
1580 if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
1581 pr_err("re-add %s to %s succeed\n",
1582 dv->devname, info.sys_name);
1583 break;
1584 }
1585 }
1586
1587 if (dv->disposition == 'F')
1588 /* Need to remove first */
1589 hot_remove_disk(fd, rdev, force);
1590 /* Make sure it isn't in use (in 2.6 or later) */
1591 tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
1592 if (tfd >= 0) {
1593 /* We know no-one else is using it. We'll
1594 * need non-exclusive access to add it, so
1595 * do that now.
1596 */
1597 close(tfd);
1598 tfd = dev_open(dv->devname, O_RDONLY);
1599 }
1600 if (tfd < 0) {
1601 if (dv->disposition == 'M')
1602 continue;
1603 pr_err("Cannot open %s: %s\n",
1604 dv->devname, strerror(errno));
1605 goto abort;
1606 }
1607 if (!frozen) {
1608 if (sysfs_freeze_array(&info) == 1)
1609 frozen = 1;
1610 else
1611 frozen = -1;
1612 }
1613 rv = Manage_add(fd, tfd, dv, tst, &array,
1614 force, verbose, devname, update,
1615 rdev, array_size, raid_slot);
1616 close(tfd);
1617 tfd = -1;
1618 if (rv < 0)
1619 goto abort;
1620 if (rv > 0)
1621 count++;
1622 break;
1623
1624 case 'r':
1625 /* hot remove */
1626 if (subarray) {
1627 pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
1628 rv = -1;
1629 } else
1630 rv = Manage_remove(tst, fd, dv, sysfd,
1631 rdev, verbose, force,
1632 devname);
1633 if (sysfd >= 0)
1634 close(sysfd);
1635 sysfd = -1;
1636 if (rv < 0)
1637 goto abort;
1638 if (rv > 0)
1639 count++;
1640 break;
1641
1642 case 'f': /* set faulty */
1643 /* FIXME check current member */
1644 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1645 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1646 rdev))) {
1647 if (errno == EBUSY)
1648 busy = 1;
1649 pr_err("set device faulty failed for %s: %s\n",
1650 dv->devname, strerror(errno));
1651 if (sysfd >= 0)
1652 close(sysfd);
1653 goto abort;
1654 }
1655 if (sysfd >= 0)
1656 close(sysfd);
1657 sysfd = -1;
1658 count++;
1659 if (verbose >= 0)
1660 pr_err("set %s faulty in %s\n",
1661 dv->devname, devname);
1662 break;
1663 case 'R': /* Mark as replaceable */
1664 if (subarray) {
1665 pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
1666 rv = -1;
1667 } else {
1668 if (!frozen) {
1669 if (sysfs_freeze_array(&info) == 1)
1670 frozen = 1;
1671 else
1672 frozen = -1;
1673 }
1674 rv = Manage_replace(tst, fd, dv,
1675 rdev, verbose,
1676 devname);
1677 }
1678 if (rv < 0)
1679 goto abort;
1680 if (rv > 0)
1681 count++;
1682 break;
1683 case 'W': /* --with device that doesn't match */
1684 pr_err("No matching --replace device for --with %s\n",
1685 dv->devname);
1686 goto abort;
1687 case 'w': /* --with device which was matched */
1688 rv = Manage_with(tst, fd, dv,
1689 rdev, verbose, devname);
1690 if (rv < 0)
1691 goto abort;
1692 break;
1693 }
1694 }
1695 if (frozen > 0)
1696 sysfs_set_str(&info, NULL, "sync_action","idle");
1697 if (test && count == 0)
1698 return 2;
1699 return 0;
1700
1701 abort:
1702 if (frozen > 0)
1703 sysfs_set_str(&info, NULL, "sync_action","idle");
1704 return !test && busy ? 2 : 1;
1705 }
1706
1707 int autodetect(void)
1708 {
1709 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1710 int rv = 1;
1711 int fd = dev_open("9:0", O_RDONLY);
1712 if (fd >= 0) {
1713 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1714 rv = 0;
1715 close(fd);
1716 }
1717 return rv;
1718 }
1719
1720 int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
1721 {
1722 struct supertype supertype, *st = &supertype;
1723 int fd, rv = 2;
1724
1725 memset(st, 0, sizeof(*st));
1726
1727 fd = open_subarray(dev, subarray, st, verbose < 0);
1728 if (fd < 0)
1729 return 2;
1730
1731 if (!st->ss->update_subarray) {
1732 if (verbose >= 0)
1733 pr_err("Operation not supported for %s metadata\n",
1734 st->ss->name);
1735 goto free_super;
1736 }
1737
1738 if (mdmon_running(st->devnm))
1739 st->update_tail = &st->updates;
1740
1741 rv = st->ss->update_subarray(st, subarray, update, ident);
1742
1743 if (rv) {
1744 if (verbose >= 0)
1745 pr_err("Failed to update %s of subarray-%s in %s\n",
1746 update, subarray, dev);
1747 } else if (st->update_tail)
1748 flush_metadata_updates(st);
1749 else
1750 st->ss->sync_metadata(st);
1751
1752 if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
1753 pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
1754 subarray, dev);
1755
1756 free_super:
1757 st->ss->free_super(st);
1758 close(fd);
1759
1760 return rv;
1761 }
1762
1763 /* Move spare from one array to another If adding to destination array fails
1764 * add back to original array.
1765 * Returns 1 on success, 0 on failure */
1766 int move_spare(char *from_devname, char *to_devname, dev_t devid)
1767 {
1768 struct mddev_dev devlist;
1769 char devname[20];
1770
1771 /* try to remove and add */
1772 int fd1 = open(to_devname, O_RDONLY);
1773 int fd2 = open(from_devname, O_RDONLY);
1774
1775 if (fd1 < 0 || fd2 < 0) {
1776 if (fd1>=0) close(fd1);
1777 if (fd2>=0) close(fd2);
1778 return 0;
1779 }
1780
1781 devlist.next = NULL;
1782 devlist.used = 0;
1783 devlist.writemostly = FlagDefault;
1784 devlist.failfast = FlagDefault;
1785 devlist.devname = devname;
1786 sprintf(devname, "%d:%d", major(devid), minor(devid));
1787
1788 devlist.disposition = 'r';
1789 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
1790 devlist.disposition = 'a';
1791 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
1792 /* make sure manager is aware of changes */
1793 ping_manager(to_devname);
1794 ping_manager(from_devname);
1795 close(fd1);
1796 close(fd2);
1797 return 1;
1798 }
1799 else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
1800 }
1801 close(fd1);
1802 close(fd2);
1803 return 0;
1804 }
1805 #endif