]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
af55266b7dcba3aeebb44ca1bb0a2e2543980444
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include <ctype.h>
29
30 #define REGISTER_DEV _IO (MD_MAJOR, 1)
31 #define START_MD _IO (MD_MAJOR, 2)
32 #define STOP_MD _IO (MD_MAJOR, 3)
33
34 int Manage_ro(char *devname, int fd, int readonly)
35 {
36 /* switch to readonly or rw
37 *
38 * requires >= 0.90.0
39 * first check that array is runing
40 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
41 *
42 */
43 struct mdinfo *mdi;
44 int rv = 0;
45
46 /* If this is an externally-managed array, we need to modify the
47 * metadata_version so that mdmon doesn't undo our change.
48 */
49 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
50 if (mdi &&
51 mdi->array.major_version == -1 &&
52 is_subarray(mdi->text_version)) {
53 char vers[64];
54 strcpy(vers, "external:");
55 strcat(vers, mdi->text_version);
56 if (readonly > 0) {
57 int rv;
58 /* We set readonly ourselves. */
59 vers[9] = '-';
60 sysfs_set_str(mdi, NULL, "metadata_version", vers);
61
62 close(fd);
63 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
64
65 if (rv < 0) {
66 pr_err("failed to set readonly for %s: %s\n",
67 devname, strerror(errno));
68
69 vers[9] = mdi->text_version[0];
70 sysfs_set_str(mdi, NULL, "metadata_version", vers);
71 rv = 1;
72 goto out;
73 }
74 } else {
75 char *cp;
76 /* We cannot set read/write - must signal mdmon */
77 vers[9] = '/';
78 sysfs_set_str(mdi, NULL, "metadata_version", vers);
79
80 cp = strchr(vers+10, '/');
81 if (cp)
82 *cp = 0;
83 ping_monitor(vers+10);
84 if (mdi->array.level <= 0)
85 sysfs_set_str(mdi, NULL, "array_state", "active");
86 }
87 goto out;
88 }
89
90 if (!md_array_active(fd)) {
91 pr_err("%s does not appear to be active.\n", devname);
92 rv = 1;
93 goto out;
94 }
95
96 if (readonly > 0) {
97 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
98 pr_err("failed to set readonly for %s: %s\n",
99 devname, strerror(errno));
100 rv = 1;
101 goto out;
102 }
103 } else if (readonly < 0) {
104 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
105 pr_err("failed to set writable for %s: %s\n",
106 devname, strerror(errno));
107 rv = 1;
108 goto out;
109 }
110 }
111 out:
112 sysfs_free(mdi);
113 return rv;
114 }
115
116 static void remove_devices(char *devnm, char *path)
117 {
118 /*
119 * Remove names at 'path' - possibly with
120 * partition suffixes - which link to the 'standard'
121 * name for devnm. These were probably created
122 * by mdadm when the array was assembled.
123 */
124 char base[40];
125 char *path2;
126 char link[1024];
127 int n;
128 int part;
129 char *be;
130 char *pe;
131
132 if (!path)
133 return;
134
135 sprintf(base, "/dev/%s", devnm);
136 be = base + strlen(base);
137
138 path2 = xmalloc(strlen(path)+20);
139 strcpy(path2, path);
140 pe = path2 + strlen(path2);
141
142 for (part = 0; part < 16; part++) {
143 if (part) {
144 sprintf(be, "p%d", part);
145
146 if (isdigit(pe[-1]))
147 sprintf(pe, "p%d", part);
148 else
149 sprintf(pe, "%d", part);
150 }
151 n = readlink(path2, link, sizeof(link));
152 if (n > 0 && (int)strlen(base) == n &&
153 strncmp(link, base, n) == 0)
154 unlink(path2);
155 }
156 free(path2);
157 }
158
159 int Manage_run(char *devname, int fd, struct context *c)
160 {
161 /* Run the array. Array must already be configured
162 * Requires >= 0.90.0
163 */
164 char nm[32], *nmp;
165
166 nmp = fd2devnm(fd);
167 if (!nmp) {
168 pr_err("Cannot find %s in sysfs!!\n", devname);
169 return 1;
170 }
171 strcpy(nm, nmp);
172 return IncrementalScan(c, nm);
173 }
174
175 int Manage_stop(char *devname, int fd, int verbose, int will_retry)
176 {
177 /* Stop the array. Array must already be configured
178 * 'will_retry' means that error messages are not wanted.
179 */
180 int rv = 0;
181 struct map_ent *map = NULL;
182 struct mdinfo *mdi;
183 char devnm[32];
184 char container[32];
185 int err;
186 int count;
187 char buf[32];
188 unsigned long long rd1, rd2;
189
190 if (will_retry && verbose == 0)
191 verbose = -1;
192
193 strcpy(devnm, fd2devnm(fd));
194 /* Get EXCL access first. If this fails, then attempting
195 * to stop is probably a bad idea.
196 */
197 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
198 if (mdi && is_subarray(mdi->text_version)) {
199 char *sl;
200 strncpy(container, mdi->text_version+1, sizeof(container));
201 container[sizeof(container)-1] = 0;
202 sl = strchr(container, '/');
203 if (sl)
204 *sl = 0;
205 } else
206 container[0] = 0;
207 close(fd);
208 count = 5;
209 while (((fd = ((devname[0] == '/')
210 ?open(devname, O_RDONLY|O_EXCL)
211 :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
212 || strcmp(fd2devnm(fd), devnm) != 0)
213 && container[0]
214 && mdmon_running(container)
215 && count) {
216 /* Can't open, so something might be wrong. However it
217 * is a container, so we might be racing with mdmon, so
218 * retry for a bit.
219 */
220 if (fd >= 0)
221 close(fd);
222 flush_mdmon(container);
223 count--;
224 }
225 if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
226 if (fd >= 0)
227 close(fd);
228 if (verbose >= 0)
229 pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
230 devname);
231 return 1;
232 }
233 /* If this is an mdmon managed array, just write 'inactive'
234 * to the array state and let mdmon clear up.
235 */
236 if (mdi &&
237 mdi->array.level > 0 &&
238 is_subarray(mdi->text_version)) {
239 int err;
240 /* This is mdmon managed. */
241 close(fd);
242
243 /* As we had an O_EXCL open, any use of the device
244 * which blocks STOP_ARRAY is probably a transient use,
245 * so it is reasonable to retry for a while - 5 seconds.
246 */
247 count = 25;
248 while (count &&
249 (err = sysfs_set_str(mdi, NULL,
250 "array_state",
251 "inactive")) < 0
252 && errno == EBUSY) {
253 usleep(200000);
254 count--;
255 }
256 if (err) {
257 if (verbose >= 0)
258 pr_err("failed to stop array %s: %s\n",
259 devname, strerror(errno));
260 rv = 1;
261 goto out;
262 }
263
264 /* Give monitor a chance to act */
265 ping_monitor(mdi->text_version);
266
267 fd = open_dev_excl(devnm);
268 if (fd < 0) {
269 if (verbose >= 0)
270 pr_err("failed to completely stop %s: Device is busy\n",
271 devname);
272 rv = 1;
273 goto out;
274 }
275 } else if (mdi &&
276 mdi->array.major_version == -1 &&
277 mdi->array.minor_version == -2 &&
278 !is_subarray(mdi->text_version)) {
279 struct mdstat_ent *mds, *m;
280 /* container, possibly mdmon-managed.
281 * Make sure mdmon isn't opening it, which
282 * would interfere with the 'stop'
283 */
284 ping_monitor(mdi->sys_name);
285
286 /* now check that there are no existing arrays
287 * which are members of this array
288 */
289 mds = mdstat_read(0, 0);
290 for (m = mds; m; m = m->next)
291 if (m->metadata_version &&
292 strncmp(m->metadata_version, "external:", 9)==0 &&
293 metadata_container_matches(m->metadata_version+9,
294 devnm)) {
295 if (verbose >= 0)
296 pr_err("Cannot stop container %s: member %s still active\n",
297 devname, m->devnm);
298 free_mdstat(mds);
299 rv = 1;
300 goto out;
301 }
302 }
303
304 /* If the array is undergoing a reshape which changes the number
305 * of devices, then it would be nice to stop it at a point where
306 * it has completed a full number of stripes in both old and
307 * new layouts as this will allow the reshape to be reverted.
308 * So if 'sync_action' is "reshape" and 'raid_disks' shows two
309 * different numbers, then
310 * - freeze reshape
311 * - set sync_max to next multiple of both data_disks and
312 * chunk sizes (or next but one)
313 * - unfreeze reshape
314 * - wait on 'sync_completed' for that point to be reached.
315 */
316 if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
317 sysfs_attribute_available(mdi, NULL, "sync_action") &&
318 sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
319 sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
320 strcmp(buf, "reshape\n") == 0 &&
321 sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
322 unsigned long long position, curr;
323 unsigned long long chunk1, chunk2;
324 unsigned long long rddiv, chunkdiv;
325 unsigned long long sectors;
326 unsigned long long sync_max, old_sync_max;
327 unsigned long long completed;
328 int backwards = 0;
329 int delay;
330 int scfd;
331
332 delay = 40;
333 while (rd1 > rd2 && delay > 0 &&
334 sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
335 /* must be in the critical section - wait a bit */
336 delay -= 1;
337 usleep(100000);
338 }
339
340 if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
341 goto done;
342 /* Array is frozen */
343
344 rd1 -= mdi->array.level == 6 ? 2 : 1;
345 rd2 -= mdi->array.level == 6 ? 2 : 1;
346 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
347 if (strncmp(buf, "back", 4) == 0)
348 backwards = 1;
349 if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
350 /* reshape must have finished now */
351 sysfs_set_str(mdi, NULL, "sync_action", "idle");
352 goto done;
353 }
354 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
355 chunk1 /= 512;
356 chunk2 /= 512;
357 rddiv = GCD(rd1, rd2);
358 chunkdiv = GCD(chunk1, chunk2);
359 sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
360
361 if (backwards) {
362 /* Need to subtract 'reshape_position' from
363 * array size to get equivalent of sync_max.
364 * Size calculation based on raid5_size in kernel.
365 */
366 unsigned long long size = mdi->component_size;
367 size &= ~(chunk1-1);
368 size &= ~(chunk2-1);
369 /* rd1 must be smaller */
370 /* Reshape may have progressed further backwards than
371 * recorded, so target even further back (hence "-1")
372 */
373 position = (position / sectors - 1) * sectors;
374 /* rd1 is always the conversion factor between 'sync'
375 * position and 'reshape' position.
376 * We read 1 "new" stripe worth of data from where-ever,
377 * and when write out that full stripe.
378 */
379 sync_max = size - position/rd1;
380 } else {
381 /* Reshape will very likely be beyond position, and it may
382 * be too late to stop at '+1', so aim for '+2'
383 */
384 position = (position / sectors + 2) * sectors;
385 sync_max = position/rd1;
386 }
387 if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
388 old_sync_max = mdi->component_size;
389 /* Must not advance sync_max as that could confuse
390 * the reshape monitor */
391 if (sync_max < old_sync_max)
392 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
393 sysfs_set_str(mdi, NULL, "sync_action", "idle");
394
395 /* That should have set things going again. Now we
396 * wait a little while (3 second max) for sync_completed
397 * to reach the target.
398 * The reshape process can block for 500msec if
399 * the sync speed limit is hit, so we need to wait
400 * a lot longer than that. 1 second is usually
401 * enough. 3 is safe.
402 */
403 delay = 3000;
404 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
405 while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
406 unsigned long long max_completed;
407 sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
408 sysfs_fd_get_str(scfd, buf, sizeof(buf));
409 if (strncmp(buf, "none", 4) == 0) {
410 /* Either reshape has aborted, or hasn't
411 * quite started yet. Wait a bit and
412 * check 'sync_action' to see.
413 */
414 usleep(10000);
415 sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
416 if (strncmp(buf, "reshape", 7) != 0)
417 break;
418 }
419
420 if (sysfs_fd_get_two(scfd, &completed,
421 &max_completed) == 2 &&
422 /* 'completed' sometimes reads as max-uulong */
423 completed < max_completed &&
424 (completed > sync_max ||
425 (completed == sync_max && curr != position))) {
426 while (completed > sync_max) {
427 sync_max += sectors / rd1;
428 if (backwards)
429 position -= sectors;
430 else
431 position += sectors;
432 }
433 if (sync_max < old_sync_max)
434 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
435 }
436
437 if (!backwards && curr >= position)
438 break;
439 if (backwards && curr <= position)
440 break;
441 sysfs_wait(scfd, &delay);
442 }
443 if (scfd >= 0)
444 close(scfd);
445
446 }
447 done:
448
449 /* As we have an O_EXCL open, any use of the device
450 * which blocks STOP_ARRAY is probably a transient use,
451 * so it is reasonable to retry for a while - 5 seconds.
452 */
453 count = 25; err = 0;
454 while (count && fd >= 0
455 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
456 && errno == EBUSY) {
457 usleep(200000);
458 count --;
459 }
460 if (fd >= 0 && err) {
461 if (verbose >= 0) {
462 pr_err("failed to stop array %s: %s\n",
463 devname, strerror(errno));
464 if (errno == EBUSY)
465 cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
466 }
467 rv = 1;
468 goto out;
469 }
470
471 if (get_linux_version() < 2006028) {
472 /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
473 * was stopped, so We'll do it here just to be sure. Drop any
474 * partitions as well...
475 */
476 if (fd >= 0)
477 ioctl(fd, BLKRRPART, 0);
478 if (mdi)
479 sysfs_uevent(mdi, "change");
480 }
481
482 if (devnm[0] && use_udev()) {
483 struct map_ent *mp = map_by_devnm(&map, devnm);
484 remove_devices(devnm, mp ? mp->path : NULL);
485 }
486
487 if (verbose >= 0)
488 pr_err("stopped %s\n", devname);
489 map_lock(&map);
490 map_remove(&map, devnm);
491 map_unlock(&map);
492 out:
493 sysfs_free(mdi);
494
495 return rv;
496 }
497
498 static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
499 {
500 struct mddev_dev *new;
501 new = xmalloc(sizeof(*new));
502 memset(new, 0, sizeof(*new));
503 new->devname = xstrdup(name);
504 new->disposition = disp;
505 new->next = dv->next;
506 dv->next = new;
507 return new;
508 }
509
510 static void add_faulty(struct mddev_dev *dv, int fd, char disp)
511 {
512 mdu_array_info_t array;
513 mdu_disk_info_t disk;
514 int remaining_disks;
515 int i;
516
517 if (md_get_array_info(fd, &array) != 0)
518 return;
519
520 remaining_disks = array.nr_disks;
521 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
522 char buf[40];
523 disk.number = i;
524 if (md_get_disk_info(fd, &disk) != 0)
525 continue;
526 if (disk.major == 0 && disk.minor == 0)
527 continue;
528 remaining_disks--;
529 if ((disk.state & 1) == 0) /* not faulty */
530 continue;
531 sprintf(buf, "%d:%d", disk.major, disk.minor);
532 dv = add_one(dv, buf, disp);
533 }
534 }
535
536 static void add_detached(struct mddev_dev *dv, int fd, char disp)
537 {
538 mdu_array_info_t array;
539 mdu_disk_info_t disk;
540 int remaining_disks;
541 int i;
542
543 if (md_get_array_info(fd, &array) != 0)
544 return;
545
546 remaining_disks = array.nr_disks;
547 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
548 char buf[40];
549 int sfd;
550 disk.number = i;
551 if (md_get_disk_info(fd, &disk) != 0)
552 continue;
553 if (disk.major == 0 && disk.minor == 0)
554 continue;
555 remaining_disks--;
556 if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
557 continue;
558 sprintf(buf, "%d:%d", disk.major, disk.minor);
559 sfd = dev_open(buf, O_RDONLY);
560 if (sfd >= 0) {
561 /* Not detached */
562 close(sfd);
563 continue;
564 }
565 if (errno != ENXIO)
566 /* Probably not detached */
567 continue;
568 dv = add_one(dv, buf, disp);
569 }
570 }
571
572 static void add_set(struct mddev_dev *dv, int fd, char set_char)
573 {
574 mdu_array_info_t array;
575 mdu_disk_info_t disk;
576 int remaining_disks;
577 int copies, set;
578 int i;
579
580 if (md_get_array_info(fd, &array) != 0)
581 return;
582 if (array.level != 10)
583 return;
584 copies = ((array.layout & 0xff) *
585 ((array.layout >> 8) & 0xff));
586 if (array.raid_disks % copies)
587 return;
588
589 remaining_disks = array.nr_disks;
590 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
591 char buf[40];
592 disk.number = i;
593 if (md_get_disk_info(fd, &disk) != 0)
594 continue;
595 if (disk.major == 0 && disk.minor == 0)
596 continue;
597 remaining_disks--;
598 set = disk.raid_disk % copies;
599 if (set_char != set + 'A')
600 continue;
601 sprintf(buf, "%d:%d", disk.major, disk.minor);
602 dv = add_one(dv, buf, dv->disposition);
603 }
604 }
605
606 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
607 struct supertype *dev_st, struct supertype *tst,
608 unsigned long rdev,
609 char *update, char *devname, int verbose,
610 mdu_array_info_t *array)
611 {
612 struct mdinfo mdi;
613 int duuid[4];
614 int ouuid[4];
615
616 dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
617 dev_st->ss->uuid_from_super(dev_st, ouuid);
618 if (tst->sb)
619 tst->ss->uuid_from_super(tst, duuid);
620 else
621 /* Assume uuid matches: kernel will check */
622 memcpy(duuid, ouuid, sizeof(ouuid));
623 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
624 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
625 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
626 /* Looks like it is worth a
627 * try. Need to make sure
628 * kernel will accept it
629 * though.
630 */
631 mdu_disk_info_t disc;
632 /* re-add doesn't work for version-1 superblocks
633 * before 2.6.18 :-(
634 */
635 if (array->major_version == 1 &&
636 get_linux_version() <= 2006018)
637 goto skip_re_add;
638 disc.number = mdi.disk.number;
639 if (md_get_disk_info(fd, &disc) != 0 ||
640 disc.major != 0 || disc.minor != 0)
641 goto skip_re_add;
642 disc.major = major(rdev);
643 disc.minor = minor(rdev);
644 disc.number = mdi.disk.number;
645 disc.raid_disk = mdi.disk.raid_disk;
646 disc.state = mdi.disk.state;
647 if (array->state & (1 << MD_SB_CLUSTERED)) {
648 /* extra flags are needed when adding to a cluster as
649 * there are two cases to distinguish
650 */
651 if (dv->disposition == 'c')
652 disc.state |= (1 << MD_DISK_CANDIDATE);
653 else
654 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
655 }
656 if (dv->writemostly == FlagSet)
657 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
658 if (dv->writemostly == FlagClear)
659 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
660 if (dv->failfast == FlagSet)
661 disc.state |= 1 << MD_DISK_FAILFAST;
662 if (dv->failfast == FlagClear)
663 disc.state &= ~(1 << MD_DISK_FAILFAST);
664 remove_partitions(tfd);
665 if (update || dv->writemostly != FlagDefault
666 || dv->failfast != FlagDefault) {
667 int rv = -1;
668 tfd = dev_open(dv->devname, O_RDWR);
669 if (tfd < 0) {
670 pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
671 return -1;
672 }
673
674 if (dv->writemostly == FlagSet)
675 rv = dev_st->ss->update_super(
676 dev_st, NULL, "writemostly",
677 devname, verbose, 0, NULL);
678 if (dv->writemostly == FlagClear)
679 rv = dev_st->ss->update_super(
680 dev_st, NULL, "readwrite",
681 devname, verbose, 0, NULL);
682 if (dv->failfast == FlagSet)
683 rv = dev_st->ss->update_super(
684 dev_st, NULL, "failfast",
685 devname, verbose, 0, NULL);
686 if (dv->failfast == FlagClear)
687 rv = dev_st->ss->update_super(
688 dev_st, NULL, "nofailfast",
689 devname, verbose, 0, NULL);
690 if (update)
691 rv = dev_st->ss->update_super(
692 dev_st, NULL, update,
693 devname, verbose, 0, NULL);
694 if (rv == 0)
695 rv = dev_st->ss->store_super(dev_st, tfd);
696 close(tfd);
697 if (rv != 0) {
698 pr_err("failed to update superblock during re-add\n");
699 return -1;
700 }
701 }
702 /* don't even try if disk is marked as faulty */
703 errno = 0;
704 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
705 if (verbose >= 0)
706 pr_err("re-added %s\n", dv->devname);
707 return 1;
708 }
709 if (errno == ENOMEM || errno == EROFS) {
710 pr_err("add new device failed for %s: %s\n",
711 dv->devname, strerror(errno));
712 if (dv->disposition == 'M')
713 return 0;
714 return -1;
715 }
716 }
717 skip_re_add:
718 return 0;
719 }
720
721 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
722 struct supertype *tst, mdu_array_info_t *array,
723 int force, int verbose, char *devname,
724 char *update, unsigned long rdev, unsigned long long array_size,
725 int raid_slot)
726 {
727 unsigned long long ldsize;
728 struct supertype *dev_st;
729 int j;
730 mdu_disk_info_t disc;
731
732 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
733 if (dv->disposition == 'M')
734 return 0;
735 else
736 return -1;
737 }
738
739 if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
740 /* More than 4TB is wasted on v0.90 */
741 if (!force) {
742 pr_err("%s is larger than %s can effectively use.\n"
743 " Add --force is you really want to add this device.\n",
744 dv->devname, devname);
745 return -1;
746 }
747 pr_err("%s is larger than %s can effectively use.\n"
748 " Adding anyway as --force was given.\n",
749 dv->devname, devname);
750 }
751 if (!tst->ss->external && array->major_version == 0) {
752 if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
753 if (verbose >= 0)
754 pr_err("hot added %s\n",
755 dv->devname);
756 return 1;
757 }
758
759 pr_err("hot add failed for %s: %s\n",
760 dv->devname, strerror(errno));
761 return -1;
762 }
763
764 if (array->not_persistent == 0 || tst->ss->external) {
765
766 /* need to find a sample superblock to copy, and
767 * a spare slot to use.
768 * For 'external' array (well, container based),
769 * We can just load the metadata for the array->
770 */
771 int array_failed;
772 if (tst->sb)
773 /* already loaded */;
774 else if (tst->ss->external) {
775 tst->ss->load_container(tst, fd, NULL);
776 } else for (j = 0; j < tst->max_devs; j++) {
777 char *dev;
778 int dfd;
779 disc.number = j;
780 if (md_get_disk_info(fd, &disc))
781 continue;
782 if (disc.major==0 && disc.minor==0)
783 continue;
784 if ((disc.state & 4)==0) /* sync */
785 continue;
786 /* Looks like a good device to try */
787 dev = map_dev(disc.major, disc.minor, 1);
788 if (!dev)
789 continue;
790 dfd = dev_open(dev, O_RDONLY);
791 if (dfd < 0)
792 continue;
793 if (tst->ss->load_super(tst, dfd,
794 NULL)) {
795 close(dfd);
796 continue;
797 }
798 close(dfd);
799 break;
800 }
801 /* FIXME this is a bad test to be using */
802 if (!tst->sb && (dv->disposition != 'a'
803 && dv->disposition != 'S')) {
804 /* we are re-adding a device to a
805 * completely dead array - have to depend
806 * on kernel to check
807 */
808 } else if (!tst->sb) {
809 pr_err("cannot load array metadata from %s\n", devname);
810 return -1;
811 }
812
813 /* Make sure device is large enough */
814 if (dv->disposition != 'j' && /* skip size check for Journal */
815 tst->sb &&
816 tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
817 array_size) {
818 if (dv->disposition == 'M')
819 return 0;
820 pr_err("%s not large enough to join array\n",
821 dv->devname);
822 return -1;
823 }
824
825 /* Possibly this device was recently part of
826 * the array and was temporarily removed, and
827 * is now being re-added. If so, we can
828 * simply re-add it.
829 */
830
831 if (array->not_persistent == 0) {
832 dev_st = dup_super(tst);
833 dev_st->ss->load_super(dev_st, tfd, NULL);
834 if (dev_st->sb && dv->disposition != 'S') {
835 int rv;
836
837 rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
838 rdev, update, devname,
839 verbose, array);
840 dev_st->ss->free_super(dev_st);
841 if (rv)
842 return rv;
843 }
844 }
845 if (dv->disposition == 'M') {
846 if (verbose > 0)
847 pr_err("--re-add for %s to %s is not possible\n",
848 dv->devname, devname);
849 return 0;
850 }
851 if (dv->disposition == 'A') {
852 pr_err("--re-add for %s to %s is not possible\n",
853 dv->devname, devname);
854 return -1;
855 }
856 if (array->active_disks < array->raid_disks) {
857 char *avail = xcalloc(array->raid_disks, 1);
858 int d;
859 int found = 0;
860
861 for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
862 disc.number = d;
863 if (md_get_disk_info(fd, &disc))
864 continue;
865 if (disc.major == 0 && disc.minor == 0)
866 continue;
867 if (!(disc.state & (1<<MD_DISK_SYNC)))
868 continue;
869 avail[disc.raid_disk] = 1;
870 found++;
871 }
872 array_failed = !enough(array->level, array->raid_disks,
873 array->layout, 1, avail);
874 free(avail);
875 } else
876 array_failed = 0;
877 if (array_failed) {
878 pr_err("%s has failed so using --add cannot work and might destroy\n",
879 devname);
880 pr_err("data on %s. You should stop the array and re-assemble it.\n",
881 dv->devname);
882 return -1;
883 }
884 } else {
885 /* non-persistent. Must ensure that new drive
886 * is at least array->size big.
887 */
888 if (ldsize/512 < array_size) {
889 pr_err("%s not large enough to join array\n",
890 dv->devname);
891 return -1;
892 }
893 }
894 /* committed to really trying this device now*/
895 remove_partitions(tfd);
896
897 /* in 2.6.17 and earlier, version-1 superblocks won't
898 * use the number we write, but will choose a free number.
899 * we must choose the same free number, which requires
900 * starting at 'raid_disks' and counting up
901 */
902 for (j = array->raid_disks; j < tst->max_devs; j++) {
903 disc.number = j;
904 if (md_get_disk_info(fd, &disc))
905 break;
906 if (disc.major==0 && disc.minor==0)
907 break;
908 if (disc.state & 8) /* removed */
909 break;
910 }
911 disc.major = major(rdev);
912 disc.minor = minor(rdev);
913 if (raid_slot < 0)
914 disc.number = j;
915 else
916 disc.number = raid_slot;
917 disc.state = 0;
918
919 /* only add journal to array that supports journaling */
920 if (dv->disposition == 'j') {
921 struct mdinfo mdi;
922 struct mdinfo *mdp;
923
924 mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
925 if (!mdp) {
926 pr_err("%s unable to read array state.\n", devname);
927 return -1;
928 }
929
930 if (mdp->array_state != ARRAY_READONLY) {
931 sysfs_free(mdp);
932 pr_err("%s is not readonly, cannot add journal.\n", devname);
933 return -1;
934 }
935
936 sysfs_free(mdp);
937
938 tst->ss->getinfo_super(tst, &mdi, NULL);
939 if (mdi.journal_device_required == 0) {
940 pr_err("%s does not support journal device.\n", devname);
941 return -1;
942 }
943 disc.raid_disk = 0;
944 }
945
946 if (array->not_persistent==0) {
947 int dfd;
948 if (dv->disposition == 'j')
949 disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
950 if (dv->writemostly == FlagSet)
951 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
952 if (dv->failfast == FlagSet)
953 disc.state |= 1 << MD_DISK_FAILFAST;
954 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
955 if (tst->ss->add_to_super(tst, &disc, dfd,
956 dv->devname, INVALID_SECTORS))
957 return -1;
958 if (tst->ss->write_init_super(tst))
959 return -1;
960 } else if (dv->disposition == 'A') {
961 /* this had better be raid1.
962 * As we are "--re-add"ing we must find a spare slot
963 * to fill.
964 */
965 char *used = xcalloc(array->raid_disks, 1);
966 for (j = 0; j < tst->max_devs; j++) {
967 mdu_disk_info_t disc2;
968 disc2.number = j;
969 if (md_get_disk_info(fd, &disc2))
970 continue;
971 if (disc2.major==0 && disc2.minor==0)
972 continue;
973 if (disc2.state & 8) /* removed */
974 continue;
975 if (disc2.raid_disk < 0)
976 continue;
977 if (disc2.raid_disk > array->raid_disks)
978 continue;
979 used[disc2.raid_disk] = 1;
980 }
981 for (j = 0 ; j < array->raid_disks; j++)
982 if (!used[j]) {
983 disc.raid_disk = j;
984 disc.state |= (1<<MD_DISK_SYNC);
985 break;
986 }
987 free(used);
988 }
989
990 if (array->state & (1 << MD_SB_CLUSTERED)) {
991 if (dv->disposition == 'c')
992 disc.state |= (1 << MD_DISK_CANDIDATE);
993 else
994 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
995 }
996
997 if (dv->writemostly == FlagSet)
998 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
999 if (dv->failfast == FlagSet)
1000 disc.state |= (1 << MD_DISK_FAILFAST);
1001 if (tst->ss->external) {
1002 /* add a disk
1003 * to an external metadata container */
1004 struct mdinfo new_mdi;
1005 struct mdinfo *sra;
1006 int container_fd;
1007 char devnm[32];
1008 int dfd;
1009
1010 strcpy(devnm, fd2devnm(fd));
1011
1012 container_fd = open_dev_excl(devnm);
1013 if (container_fd < 0) {
1014 pr_err("add failed for %s: could not get exclusive access to container\n",
1015 dv->devname);
1016 tst->ss->free_super(tst);
1017 return -1;
1018 }
1019
1020 Kill(dv->devname, NULL, 0, -1, 0);
1021 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
1022 if (mdmon_running(tst->container_devnm))
1023 tst->update_tail = &tst->updates;
1024 if (tst->ss->add_to_super(tst, &disc, dfd,
1025 dv->devname, INVALID_SECTORS)) {
1026 close(dfd);
1027 close(container_fd);
1028 return -1;
1029 }
1030 if (tst->update_tail)
1031 flush_metadata_updates(tst);
1032 else
1033 tst->ss->sync_metadata(tst);
1034
1035 sra = sysfs_read(container_fd, NULL, 0);
1036 if (!sra) {
1037 pr_err("add failed for %s: sysfs_read failed\n",
1038 dv->devname);
1039 close(container_fd);
1040 tst->ss->free_super(tst);
1041 return -1;
1042 }
1043 sra->array.level = LEVEL_CONTAINER;
1044 /* Need to set data_offset and component_size */
1045 tst->ss->getinfo_super(tst, &new_mdi, NULL);
1046 new_mdi.disk.major = disc.major;
1047 new_mdi.disk.minor = disc.minor;
1048 new_mdi.recovery_start = 0;
1049 /* Make sure fds are closed as they are O_EXCL which
1050 * would block add_disk */
1051 tst->ss->free_super(tst);
1052 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
1053 pr_err("add new device to external metadata failed for %s\n", dv->devname);
1054 close(container_fd);
1055 sysfs_free(sra);
1056 return -1;
1057 }
1058 ping_monitor(devnm);
1059 sysfs_free(sra);
1060 close(container_fd);
1061 } else {
1062 tst->ss->free_super(tst);
1063 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
1064 if (dv->disposition == 'j')
1065 pr_err("Failed to hot add %s as journal, "
1066 "please try restart %s.\n", dv->devname, devname);
1067 else
1068 pr_err("add new device failed for %s as %d: %s\n",
1069 dv->devname, j, strerror(errno));
1070 return -1;
1071 }
1072 if (dv->disposition == 'j') {
1073 pr_err("Journal added successfully, making %s read-write\n", devname);
1074 if (Manage_ro(devname, fd, -1))
1075 pr_err("Failed to make %s read-write\n", devname);
1076 }
1077
1078 }
1079 if (verbose >= 0)
1080 pr_err("added %s\n", dv->devname);
1081 return 1;
1082 }
1083
1084 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
1085 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
1086 {
1087 int lfd = -1;
1088 int err;
1089
1090 if (tst->ss->external) {
1091 /* To remove a device from a container, we must
1092 * check that it isn't in use in an array.
1093 * This involves looking in the 'holders'
1094 * directory - there must be just one entry,
1095 * the container.
1096 * To ensure that it doesn't get used as a
1097 * hot spare while we are checking, we
1098 * get an O_EXCL open on the container
1099 */
1100 int ret;
1101 char devnm[32];
1102 strcpy(devnm, fd2devnm(fd));
1103 lfd = open_dev_excl(devnm);
1104 if (lfd < 0) {
1105 pr_err("Cannot get exclusive access to container - odd\n");
1106 return -1;
1107 }
1108 /* We may not be able to check on holders in
1109 * sysfs, either because we don't have the dev num
1110 * (rdev == 0) or because the device has been detached
1111 * and the 'holders' directory no longer exists
1112 * (ret == -1). In that case, assume it is OK to
1113 * remove.
1114 */
1115 if (rdev == 0)
1116 ret = -1;
1117 else {
1118 /*
1119 * The drive has already been set to 'faulty', however
1120 * monitor might not have had time to process it and the
1121 * drive might still have an entry in the 'holders'
1122 * directory. Try a few times to avoid a false error
1123 */
1124 int count = 20;
1125
1126 do {
1127 ret = sysfs_unique_holder(devnm, rdev);
1128 if (ret < 2)
1129 break;
1130 usleep(100 * 1000); /* 100ms */
1131 } while (--count > 0);
1132
1133 if (ret == 0) {
1134 pr_err("%s is not a member, cannot remove.\n",
1135 dv->devname);
1136 close(lfd);
1137 return -1;
1138 }
1139 if (ret >= 2) {
1140 pr_err("%s is still in use, cannot remove.\n",
1141 dv->devname);
1142 close(lfd);
1143 return -1;
1144 }
1145 }
1146 }
1147 /* FIXME check that it is a current member */
1148 if (sysfd >= 0) {
1149 /* device has been removed and we don't know
1150 * the major:minor number
1151 */
1152 err = sys_hot_remove_disk(sysfd, force);
1153 } else {
1154 err = hot_remove_disk(fd, rdev, force);
1155 if (err && errno == ENODEV) {
1156 /* Old kernels rejected this if no personality
1157 * is registered */
1158 struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
1159 struct mdinfo *dv = NULL;
1160 if (sra)
1161 dv = sra->devs;
1162 for ( ; dv ; dv=dv->next)
1163 if (dv->disk.major == (int)major(rdev) &&
1164 dv->disk.minor == (int)minor(rdev))
1165 break;
1166 if (dv)
1167 err = sysfs_set_str(sra, dv,
1168 "state", "remove");
1169 else
1170 err = -1;
1171 sysfs_free(sra);
1172 }
1173 }
1174 if (err) {
1175 pr_err("hot remove failed for %s: %s\n", dv->devname,
1176 strerror(errno));
1177 if (lfd >= 0)
1178 close(lfd);
1179 return -1;
1180 }
1181 if (tst->ss->external) {
1182 /*
1183 * Before dropping our exclusive open we make an
1184 * attempt at preventing mdmon from seeing an
1185 * 'add' event before reconciling this 'remove'
1186 * event.
1187 */
1188 char *devnm = fd2devnm(fd);
1189
1190 if (!devnm) {
1191 pr_err("unable to get container name\n");
1192 return -1;
1193 }
1194
1195 ping_manager(devnm);
1196 }
1197 if (lfd >= 0)
1198 close(lfd);
1199 if (verbose >= 0)
1200 pr_err("hot removed %s from %s\n",
1201 dv->devname, devname);
1202 return 1;
1203 }
1204
1205 int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
1206 unsigned long rdev, int verbose, char *devname)
1207 {
1208 struct mdinfo *mdi, *di;
1209 if (tst->ss->external) {
1210 pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
1211 return -1;
1212 }
1213 /* Need to find the device in sysfs and add 'want_replacement' to the
1214 * status.
1215 */
1216 mdi = sysfs_read(fd, NULL, GET_DEVS);
1217 if (!mdi || !mdi->devs) {
1218 pr_err("Cannot find status of %s to enable replacement - strange\n",
1219 devname);
1220 return -1;
1221 }
1222 for (di = mdi->devs; di; di = di->next)
1223 if (di->disk.major == (int)major(rdev) &&
1224 di->disk.minor == (int)minor(rdev))
1225 break;
1226 if (di) {
1227 int rv;
1228 if (di->disk.raid_disk < 0) {
1229 pr_err("%s is not active and so cannot be replaced.\n",
1230 dv->devname);
1231 sysfs_free(mdi);
1232 return -1;
1233 }
1234 rv = sysfs_set_str(mdi, di,
1235 "state", "want_replacement");
1236 if (rv) {
1237 sysfs_free(mdi);
1238 pr_err("Failed to request replacement for %s\n",
1239 dv->devname);
1240 return -1;
1241 }
1242 if (verbose >= 0)
1243 pr_err("Marked %s (device %d in %s) for replacement\n",
1244 dv->devname, di->disk.raid_disk, devname);
1245 /* If there is a matching 'with', we need to tell it which
1246 * raid disk
1247 */
1248 while (dv && dv->disposition != 'W')
1249 dv = dv->next;
1250 if (dv) {
1251 dv->disposition = 'w';
1252 dv->used = di->disk.raid_disk;
1253 }
1254 return 1;
1255 }
1256 sysfs_free(mdi);
1257 pr_err("%s not found in %s so cannot --replace it\n",
1258 dv->devname, devname);
1259 return -1;
1260 }
1261
1262 int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
1263 unsigned long rdev, int verbose, char *devname)
1264 {
1265 struct mdinfo *mdi, *di;
1266 /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
1267 mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
1268 if (!mdi || !mdi->devs) {
1269 pr_err("Cannot find status of %s to enable replacement - strange\n",
1270 devname);
1271 return -1;
1272 }
1273 for (di = mdi->devs; di; di = di->next)
1274 if (di->disk.major == (int)major(rdev) &&
1275 di->disk.minor == (int)minor(rdev))
1276 break;
1277 if (di) {
1278 int rv;
1279 if (di->disk.state & (1<<MD_DISK_FAULTY)) {
1280 pr_err("%s is faulty and cannot be a replacement\n",
1281 dv->devname);
1282 sysfs_free(mdi);
1283 return -1;
1284 }
1285 if (di->disk.raid_disk >= 0) {
1286 pr_err("%s is active and cannot be a replacement\n",
1287 dv->devname);
1288 sysfs_free(mdi);
1289 return -1;
1290 }
1291 rv = sysfs_set_num(mdi, di,
1292 "slot", dv->used);
1293 if (rv) {
1294 sysfs_free(mdi);
1295 pr_err("Failed to set %s as preferred replacement.\n",
1296 dv->devname);
1297 return -1;
1298 }
1299 if (verbose >= 0)
1300 pr_err("Marked %s in %s as replacement for device %d\n",
1301 dv->devname, devname, dv->used);
1302 return 1;
1303 }
1304 sysfs_free(mdi);
1305 pr_err("%s not found in %s so cannot make it preferred replacement\n",
1306 dv->devname, devname);
1307 return -1;
1308 }
1309
1310 int Manage_subdevs(char *devname, int fd,
1311 struct mddev_dev *devlist, int verbose, int test,
1312 char *update, int force)
1313 {
1314 /* Do something to each dev.
1315 * devmode can be
1316 * 'a' - add the device
1317 * try HOT_ADD_DISK
1318 * If that fails EINVAL, try ADD_NEW_DISK
1319 * 'S' - add the device as a spare - don't try re-add
1320 * 'j' - add the device as a journal device
1321 * 'A' - re-add the device
1322 * 'r' - remove the device: HOT_REMOVE_DISK
1323 * device can be 'faulty' or 'detached' in which case all
1324 * matching devices are removed.
1325 * 'f' - set the device faulty SET_DISK_FAULTY
1326 * device can be 'detached' in which case any device that
1327 * is inaccessible will be marked faulty.
1328 * 'R' - mark this device as wanting replacement.
1329 * 'W' - this device is added if necessary and activated as
1330 * a replacement for a previous 'R' device.
1331 * -----
1332 * 'w' - 'W' will be changed to 'w' when it is paired with
1333 * a 'R' device. If a 'W' is found while walking the list
1334 * it must be unpaired, and is an error.
1335 * 'M' - this is created by a 'missing' target. It is a slight
1336 * variant on 'A'
1337 * 'F' - Another variant of 'A', where the device was faulty
1338 * so must be removed from the array first.
1339 * 'c' - confirm the device as found (for clustered environments)
1340 *
1341 * For 'f' and 'r', the device can also be a kernel-internal
1342 * name such as 'sdb'.
1343 */
1344 mdu_array_info_t array;
1345 unsigned long long array_size;
1346 struct mddev_dev *dv;
1347 int tfd = -1;
1348 struct supertype *tst;
1349 char *subarray = NULL;
1350 int sysfd = -1;
1351 int count = 0; /* number of actions taken */
1352 struct mdinfo info;
1353 struct mdinfo devinfo;
1354 int frozen = 0;
1355 int busy = 0;
1356 int raid_slot = -1;
1357
1358 if (sysfs_init(&info, fd, NULL)) {
1359 pr_err("sysfs not availabile for %s\n", devname);
1360 goto abort;
1361 }
1362
1363 if (md_get_array_info(fd, &array)) {
1364 pr_err("Cannot get array info for %s\n", devname);
1365 goto abort;
1366 }
1367 /* array.size is only 32 bits and may be truncated.
1368 * So read from sysfs if possible, and record number of sectors
1369 */
1370
1371 array_size = get_component_size(fd);
1372 if (array_size <= 0)
1373 array_size = array.size * 2;
1374
1375 tst = super_by_fd(fd, &subarray);
1376 if (!tst) {
1377 pr_err("unsupport array - version %d.%d\n",
1378 array.major_version, array.minor_version);
1379 goto abort;
1380 }
1381
1382 for (dv = devlist; dv; dv = dv->next) {
1383 unsigned long rdev = 0; /* device to add/remove etc */
1384 int rv;
1385 int mj,mn;
1386
1387 raid_slot = -1;
1388 if (dv->disposition == 'c') {
1389 rv = parse_cluster_confirm_arg(dv->devname,
1390 &dv->devname,
1391 &raid_slot);
1392 if (rv) {
1393 pr_err("Could not get the devname of cluster\n");
1394 goto abort;
1395 }
1396 }
1397
1398 if (strcmp(dv->devname, "failed") == 0 ||
1399 strcmp(dv->devname, "faulty") == 0) {
1400 if (dv->disposition != 'A'
1401 && dv->disposition != 'r') {
1402 pr_err("%s only meaningful with -r or --re-add, not -%c\n",
1403 dv->devname, dv->disposition);
1404 goto abort;
1405 }
1406 add_faulty(dv, fd, (dv->disposition == 'A'
1407 ? 'F' : 'r'));
1408 continue;
1409 }
1410 if (strcmp(dv->devname, "detached") == 0) {
1411 if (dv->disposition != 'r' && dv->disposition != 'f') {
1412 pr_err("%s only meaningful with -r of -f, not -%c\n",
1413 dv->devname, dv->disposition);
1414 goto abort;
1415 }
1416 add_detached(dv, fd, dv->disposition);
1417 continue;
1418 }
1419
1420 if (strcmp(dv->devname, "missing") == 0) {
1421 struct mddev_dev *add_devlist;
1422 struct mddev_dev **dp;
1423 if (dv->disposition == 'c') {
1424 rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
1425 break;
1426 }
1427
1428 if (dv->disposition != 'A') {
1429 pr_err("'missing' only meaningful with --re-add\n");
1430 goto abort;
1431 }
1432 add_devlist = conf_get_devs();
1433 if (add_devlist == NULL) {
1434 pr_err("no devices to scan for missing members.");
1435 continue;
1436 }
1437 for (dp = &add_devlist; *dp; dp = & (*dp)->next)
1438 /* 'M' (for 'missing') is like 'A' without errors */
1439 (*dp)->disposition = 'M';
1440 *dp = dv->next;
1441 dv->next = add_devlist;
1442 continue;
1443 }
1444
1445 if (strncmp(dv->devname, "set-", 4) == 0 &&
1446 strlen(dv->devname) == 5) {
1447 int copies;
1448
1449 if (dv->disposition != 'r' &&
1450 dv->disposition != 'f') {
1451 pr_err("'%s' only meaningful with -r or -f\n",
1452 dv->devname);
1453 goto abort;
1454 }
1455 if (array.level != 10) {
1456 pr_err("'%s' only meaningful with RAID10 arrays\n",
1457 dv->devname);
1458 goto abort;
1459 }
1460 copies = ((array.layout & 0xff) *
1461 ((array.layout >> 8) & 0xff));
1462 if (array.raid_disks % copies != 0 ||
1463 dv->devname[4] < 'A' ||
1464 dv->devname[4] >= 'A' + copies ||
1465 copies > 26) {
1466 pr_err("'%s' not meaningful with this array\n",
1467 dv->devname);
1468 goto abort;
1469 }
1470 add_set(dv, fd, dv->devname[4]);
1471 continue;
1472 }
1473
1474 if (strchr(dv->devname, '/') == NULL &&
1475 strchr(dv->devname, ':') == NULL &&
1476 strlen(dv->devname) < 50) {
1477 /* Assume this is a kernel-internal name like 'sda1' */
1478 int found = 0;
1479 char dname[55];
1480 if (dv->disposition != 'r' && dv->disposition != 'f') {
1481 pr_err("%s only meaningful with -r or -f, not -%c\n",
1482 dv->devname, dv->disposition);
1483 goto abort;
1484 }
1485
1486 sprintf(dname, "dev-%s", dv->devname);
1487 sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
1488 if (sysfd >= 0) {
1489 char dn[20];
1490 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
1491 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
1492 rdev = makedev(mj,mn);
1493 found = 1;
1494 }
1495 close(sysfd);
1496 sysfd = -1;
1497 }
1498 if (!found) {
1499 sysfd = sysfs_open(fd2devnm(fd), dname, "state");
1500 if (sysfd < 0) {
1501 pr_err("%s does not appear to be a component of %s\n",
1502 dv->devname, devname);
1503 goto abort;
1504 }
1505 }
1506 } else if ((dv->disposition == 'r' || dv->disposition == 'f')
1507 && get_maj_min(dv->devname, &mj, &mn)) {
1508 /* for 'fail' and 'remove', the device might
1509 * not exist.
1510 */
1511 rdev = makedev(mj, mn);
1512 } else {
1513 struct stat stb;
1514 tfd = dev_open(dv->devname, O_RDONLY);
1515 if (tfd >= 0) {
1516 fstat_is_blkdev(tfd, dv->devname, &rdev);
1517 close(tfd);
1518 } else {
1519 int open_err = errno;
1520 if (stat(dv->devname, &stb) != 0) {
1521 pr_err("Cannot find %s: %s\n",
1522 dv->devname, strerror(errno));
1523 goto abort;
1524 }
1525 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
1526 if (dv->disposition == 'M')
1527 /* non-fatal. Also improbable */
1528 continue;
1529 pr_err("%s is not a block device.\n",
1530 dv->devname);
1531 goto abort;
1532 }
1533 if (dv->disposition == 'r')
1534 /* Be happy, the stat worked, that is
1535 * enough for --remove
1536 */
1537 ;
1538 else {
1539 if (dv->disposition == 'M')
1540 /* non-fatal */
1541 continue;
1542 pr_err("Cannot open %s: %s\n",
1543 dv->devname, strerror(open_err));
1544 goto abort;
1545 }
1546 }
1547 rdev = stb.st_rdev;
1548 }
1549 switch(dv->disposition){
1550 default:
1551 pr_err("internal error - devmode[%s]=%d\n",
1552 dv->devname, dv->disposition);
1553 goto abort;
1554 case 'a':
1555 case 'S': /* --add-spare */
1556 case 'j': /* --add-journal */
1557 case 'A':
1558 case 'M': /* --re-add missing */
1559 case 'F': /* --re-add faulty */
1560 case 'c': /* --cluster-confirm */
1561 /* add the device */
1562 if (subarray) {
1563 pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
1564 goto abort;
1565 }
1566
1567 /* Let's first try to write re-add to sysfs */
1568 if (rdev != 0 &&
1569 (dv->disposition == 'A' || dv->disposition == 'F')) {
1570 sysfs_init_dev(&devinfo, rdev);
1571 if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
1572 pr_err("re-add %s to %s succeed\n",
1573 dv->devname, info.sys_name);
1574 break;
1575 }
1576 }
1577
1578 if (dv->disposition == 'F')
1579 /* Need to remove first */
1580 hot_remove_disk(fd, rdev, force);
1581 /* Make sure it isn't in use (in 2.6 or later) */
1582 tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
1583 if (tfd >= 0) {
1584 /* We know no-one else is using it. We'll
1585 * need non-exclusive access to add it, so
1586 * do that now.
1587 */
1588 close(tfd);
1589 tfd = dev_open(dv->devname, O_RDONLY);
1590 }
1591 if (tfd < 0) {
1592 if (dv->disposition == 'M')
1593 continue;
1594 pr_err("Cannot open %s: %s\n",
1595 dv->devname, strerror(errno));
1596 goto abort;
1597 }
1598 if (!frozen) {
1599 if (sysfs_freeze_array(&info) == 1)
1600 frozen = 1;
1601 else
1602 frozen = -1;
1603 }
1604 rv = Manage_add(fd, tfd, dv, tst, &array,
1605 force, verbose, devname, update,
1606 rdev, array_size, raid_slot);
1607 close(tfd);
1608 tfd = -1;
1609 if (rv < 0)
1610 goto abort;
1611 if (rv > 0)
1612 count++;
1613 break;
1614
1615 case 'r':
1616 /* hot remove */
1617 if (subarray) {
1618 pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
1619 rv = -1;
1620 } else
1621 rv = Manage_remove(tst, fd, dv, sysfd,
1622 rdev, verbose, force,
1623 devname);
1624 if (sysfd >= 0)
1625 close(sysfd);
1626 sysfd = -1;
1627 if (rv < 0)
1628 goto abort;
1629 if (rv > 0)
1630 count++;
1631 break;
1632
1633 case 'f': /* set faulty */
1634 /* FIXME check current member */
1635 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1636 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1637 rdev))) {
1638 if (errno == EBUSY)
1639 busy = 1;
1640 pr_err("set device faulty failed for %s: %s\n",
1641 dv->devname, strerror(errno));
1642 if (sysfd >= 0)
1643 close(sysfd);
1644 goto abort;
1645 }
1646 if (sysfd >= 0)
1647 close(sysfd);
1648 sysfd = -1;
1649 count++;
1650 if (verbose >= 0)
1651 pr_err("set %s faulty in %s\n",
1652 dv->devname, devname);
1653 break;
1654 case 'R': /* Mark as replaceable */
1655 if (subarray) {
1656 pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
1657 rv = -1;
1658 } else {
1659 if (!frozen) {
1660 if (sysfs_freeze_array(&info) == 1)
1661 frozen = 1;
1662 else
1663 frozen = -1;
1664 }
1665 rv = Manage_replace(tst, fd, dv,
1666 rdev, verbose,
1667 devname);
1668 }
1669 if (rv < 0)
1670 goto abort;
1671 if (rv > 0)
1672 count++;
1673 break;
1674 case 'W': /* --with device that doesn't match */
1675 pr_err("No matching --replace device for --with %s\n",
1676 dv->devname);
1677 goto abort;
1678 case 'w': /* --with device which was matched */
1679 rv = Manage_with(tst, fd, dv,
1680 rdev, verbose, devname);
1681 if (rv < 0)
1682 goto abort;
1683 break;
1684 }
1685 }
1686 if (frozen > 0)
1687 sysfs_set_str(&info, NULL, "sync_action","idle");
1688 if (test && count == 0)
1689 return 2;
1690 return 0;
1691
1692 abort:
1693 if (frozen > 0)
1694 sysfs_set_str(&info, NULL, "sync_action","idle");
1695 return !test && busy ? 2 : 1;
1696 }
1697
1698 int autodetect(void)
1699 {
1700 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1701 int rv = 1;
1702 int fd = dev_open("9:0", O_RDONLY);
1703 if (fd >= 0) {
1704 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1705 rv = 0;
1706 close(fd);
1707 }
1708 return rv;
1709 }
1710
1711 int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
1712 {
1713 struct supertype supertype, *st = &supertype;
1714 int fd, rv = 2;
1715
1716 memset(st, 0, sizeof(*st));
1717
1718 fd = open_subarray(dev, subarray, st, verbose < 0);
1719 if (fd < 0)
1720 return 2;
1721
1722 if (!st->ss->update_subarray) {
1723 if (verbose >= 0)
1724 pr_err("Operation not supported for %s metadata\n",
1725 st->ss->name);
1726 goto free_super;
1727 }
1728
1729 if (mdmon_running(st->devnm))
1730 st->update_tail = &st->updates;
1731
1732 rv = st->ss->update_subarray(st, subarray, update, ident);
1733
1734 if (rv) {
1735 if (verbose >= 0)
1736 pr_err("Failed to update %s of subarray-%s in %s\n",
1737 update, subarray, dev);
1738 } else if (st->update_tail)
1739 flush_metadata_updates(st);
1740 else
1741 st->ss->sync_metadata(st);
1742
1743 if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
1744 pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
1745 subarray, dev);
1746
1747 free_super:
1748 st->ss->free_super(st);
1749 close(fd);
1750
1751 return rv;
1752 }
1753
1754 /* Move spare from one array to another If adding to destination array fails
1755 * add back to original array.
1756 * Returns 1 on success, 0 on failure */
1757 int move_spare(char *from_devname, char *to_devname, dev_t devid)
1758 {
1759 struct mddev_dev devlist;
1760 char devname[20];
1761
1762 /* try to remove and add */
1763 int fd1 = open(to_devname, O_RDONLY);
1764 int fd2 = open(from_devname, O_RDONLY);
1765
1766 if (fd1 < 0 || fd2 < 0) {
1767 if (fd1>=0) close(fd1);
1768 if (fd2>=0) close(fd2);
1769 return 0;
1770 }
1771
1772 devlist.next = NULL;
1773 devlist.used = 0;
1774 devlist.writemostly = FlagDefault;
1775 devlist.failfast = FlagDefault;
1776 devlist.devname = devname;
1777 sprintf(devname, "%d:%d", major(devid), minor(devid));
1778
1779 devlist.disposition = 'r';
1780 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
1781 devlist.disposition = 'a';
1782 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
1783 /* make sure manager is aware of changes */
1784 ping_manager(to_devname);
1785 ping_manager(from_devname);
1786 close(fd1);
1787 close(fd2);
1788 return 1;
1789 }
1790 else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
1791 }
1792 close(fd1);
1793 close(fd2);
1794 return 0;
1795 }