]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Manage.c
915322667788734b433cdaa4b1903a6ba2fa0377
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
23 */
24
25 #include "mdadm.h"
26 #include "md_u.h"
27 #include "md_p.h"
28 #include "udev.h"
29 #include <ctype.h>
30
31 int Manage_ro(char *devname, int fd, int readonly)
32 {
33 /* switch to readonly or rw
34 *
35 * requires >= 0.90.0
36 * first check that array is runing
37 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
38 *
39 */
40 struct mdinfo *mdi;
41 int rv = 0;
42
43 /* If this is an externally-managed array, we need to modify the
44 * metadata_version so that mdmon doesn't undo our change.
45 */
46 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
47 if (mdi &&
48 mdi->array.major_version == -1 &&
49 is_subarray(mdi->text_version)) {
50 char vers[64];
51 strcpy(vers, "external:");
52 strcat(vers, mdi->text_version);
53 if (readonly > 0) {
54 int rv;
55 /* We set readonly ourselves. */
56 vers[9] = '-';
57 sysfs_set_str(mdi, NULL, "metadata_version", vers);
58
59 close(fd);
60 rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
61
62 if (rv < 0) {
63 pr_err("failed to set readonly for %s: %s\n",
64 devname, strerror(errno));
65
66 vers[9] = mdi->text_version[0];
67 sysfs_set_str(mdi, NULL, "metadata_version", vers);
68 rv = 1;
69 goto out;
70 }
71 } else {
72 char *cp;
73 /* We cannot set read/write - must signal mdmon */
74 vers[9] = '/';
75 sysfs_set_str(mdi, NULL, "metadata_version", vers);
76
77 cp = strchr(vers+10, '/');
78 if (cp)
79 *cp = 0;
80 ping_monitor(vers+10);
81 if (mdi->array.level <= 0)
82 sysfs_set_str(mdi, NULL, "array_state", "active");
83 }
84 goto out;
85 }
86
87 if (!md_array_active(fd)) {
88 pr_err("%s does not appear to be active.\n", devname);
89 rv = 1;
90 goto out;
91 }
92
93 if (readonly > 0) {
94 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
95 pr_err("failed to set readonly for %s: %s\n",
96 devname, strerror(errno));
97 rv = 1;
98 goto out;
99 }
100 } else if (readonly < 0) {
101 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
102 pr_err("failed to set writable for %s: %s\n",
103 devname, strerror(errno));
104 rv = 1;
105 goto out;
106 }
107 }
108 out:
109 sysfs_free(mdi);
110 return rv;
111 }
112
113 static void remove_devices(char *devnm, char *path)
114 {
115 /*
116 * Remove names at 'path' - possibly with
117 * partition suffixes - which link to the 'standard'
118 * name for devnm. These were probably created
119 * by mdadm when the array was assembled.
120 */
121 char base[40];
122 char *path2;
123 char link[1024];
124 int n;
125 int part;
126 char *be;
127 char *pe;
128
129 if (!path)
130 return;
131
132 sprintf(base, "/dev/%s", devnm);
133 be = base + strlen(base);
134
135 path2 = xmalloc(strlen(path)+20);
136 strcpy(path2, path);
137 pe = path2 + strlen(path2);
138
139 for (part = 0; part < 16; part++) {
140 if (part) {
141 sprintf(be, "p%d", part);
142
143 if (isdigit(pe[-1]))
144 sprintf(pe, "p%d", part);
145 else
146 sprintf(pe, "%d", part);
147 }
148 n = readlink(path2, link, sizeof(link));
149 if (n > 0 && (int)strlen(base) == n &&
150 strncmp(link, base, n) == 0)
151 unlink(path2);
152 }
153 free(path2);
154 }
155
156 int Manage_run(char *devname, int fd, struct context *c)
157 {
158 /* Run the array. Array must already be configured
159 * Requires >= 0.90.0
160 */
161 char nm[32], *nmp;
162
163 nmp = fd2devnm(fd);
164 if (!nmp) {
165 pr_err("Cannot find %s in sysfs!!\n", devname);
166 return 1;
167 }
168 strcpy(nm, nmp);
169 return IncrementalScan(c, nm);
170 }
171
172 int Manage_stop(char *devname, int fd, int verbose, int will_retry)
173 {
174 /* Stop the array. Array must already be configured
175 * 'will_retry' means that error messages are not wanted.
176 */
177 int rv = 0;
178 struct map_ent *map = NULL;
179 struct mdinfo *mdi;
180 char devnm[32];
181 char container[32];
182 int err;
183 int count;
184 char buf[32];
185 unsigned long long rd1, rd2;
186
187 if (will_retry && verbose == 0)
188 verbose = -1;
189
190 strcpy(devnm, fd2devnm(fd));
191 /* Get EXCL access first. If this fails, then attempting
192 * to stop is probably a bad idea.
193 */
194 mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
195 if (mdi && is_subarray(mdi->text_version)) {
196 char *sl;
197 strncpy(container, mdi->text_version+1, sizeof(container));
198 container[sizeof(container)-1] = 0;
199 sl = strchr(container, '/');
200 if (sl)
201 *sl = 0;
202 } else
203 container[0] = 0;
204 close(fd);
205 count = 5;
206 while (((fd = ((devname[0] == '/')
207 ?open(devname, O_RDONLY|O_EXCL)
208 :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0 ||
209 strcmp(fd2devnm(fd), devnm) != 0) && container[0] &&
210 mdmon_running(container) && count) {
211 /* Can't open, so something might be wrong. However it
212 * is a container, so we might be racing with mdmon, so
213 * retry for a bit.
214 */
215 if (fd >= 0)
216 close(fd);
217 flush_mdmon(container);
218 count--;
219 }
220 if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
221 if (fd >= 0)
222 close(fd);
223 if (verbose >= 0)
224 pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
225 devname);
226 sysfs_free(mdi);
227 return 1;
228 }
229 /* If this is an mdmon managed array, just write 'inactive'
230 * to the array state and let mdmon clear up.
231 */
232 if (mdi &&
233 mdi->array.level > 0 &&
234 is_subarray(mdi->text_version)) {
235 int err;
236 /* This is mdmon managed. */
237 close(fd);
238
239 /* As we had an O_EXCL open, any use of the device
240 * which blocks STOP_ARRAY is probably a transient use,
241 * so it is reasonable to retry for a while - 5 seconds.
242 */
243 count = 25;
244 while (count &&
245 (err = sysfs_set_str(mdi, NULL,
246 "array_state",
247 "inactive")) < 0 &&
248 errno == EBUSY) {
249 sleep_for(0, MSEC_TO_NSEC(200), true);
250 count--;
251 }
252 if (err) {
253 if (verbose >= 0)
254 pr_err("failed to stop array %s: %s\n",
255 devname, strerror(errno));
256 rv = 1;
257 goto out;
258 }
259
260 /* Give monitor a chance to act */
261 ping_monitor(mdi->text_version);
262
263 fd = open_dev_excl(devnm);
264 if (fd < 0) {
265 if (verbose >= 0)
266 pr_err("failed to completely stop %s: Device is busy\n",
267 devname);
268 rv = 1;
269 goto out;
270 }
271 } else if (mdi &&
272 mdi->array.major_version == -1 &&
273 mdi->array.minor_version == -2 &&
274 !is_subarray(mdi->text_version)) {
275 struct mdstat_ent *mds, *m;
276 /* container, possibly mdmon-managed.
277 * Make sure mdmon isn't opening it, which
278 * would interfere with the 'stop'
279 */
280 ping_monitor(mdi->sys_name);
281
282 /* now check that there are no existing arrays
283 * which are members of this array
284 */
285 mds = mdstat_read(0, 0);
286 for (m = mds; m; m = m->next)
287 if (m->metadata_version &&
288 strncmp(m->metadata_version, "external:", 9)==0 &&
289 metadata_container_matches(m->metadata_version+9,
290 devnm)) {
291 if (verbose >= 0)
292 pr_err("Cannot stop container %s: member %s still active\n",
293 devname, m->devnm);
294 free_mdstat(mds);
295 rv = 1;
296 goto out;
297 }
298 }
299
300 /* If the array is undergoing a reshape which changes the number
301 * of devices, then it would be nice to stop it at a point where
302 * it has completed a full number of stripes in both old and
303 * new layouts as this will allow the reshape to be reverted.
304 * So if 'sync_action' is "reshape" and 'raid_disks' shows two
305 * different numbers, then
306 * - freeze reshape
307 * - set sync_max to next multiple of both data_disks and
308 * chunk sizes (or next but one)
309 * - unfreeze reshape
310 * - wait on 'sync_completed' for that point to be reached.
311 */
312 if (mdi && is_level456(mdi->array.level) &&
313 sysfs_attribute_available(mdi, NULL, "sync_action") &&
314 sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
315 sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
316 strcmp(buf, "reshape\n") == 0 &&
317 sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
318 unsigned long long position, curr;
319 unsigned long long chunk1, chunk2;
320 unsigned long long rddiv, chunkdiv;
321 unsigned long long sectors;
322 unsigned long long sync_max, old_sync_max;
323 unsigned long long completed;
324 int backwards = 0;
325 int delay;
326 int scfd;
327
328 delay = 40;
329 while (rd1 > rd2 && delay > 0 &&
330 sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
331 /* must be in the critical section - wait a bit */
332 delay -= 1;
333 sleep_for(0, MSEC_TO_NSEC(100), true);
334 }
335
336 if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
337 goto done;
338 /* Array is frozen */
339
340 rd1 -= mdi->array.level == 6 ? 2 : 1;
341 rd2 -= mdi->array.level == 6 ? 2 : 1;
342 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
343 if (strncmp(buf, "back", 4) == 0)
344 backwards = 1;
345 if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
346 /* reshape must have finished now */
347 sysfs_set_str(mdi, NULL, "sync_action", "idle");
348 goto done;
349 }
350 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
351 chunk1 /= 512;
352 chunk2 /= 512;
353 rddiv = GCD(rd1, rd2);
354 chunkdiv = GCD(chunk1, chunk2);
355 sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
356
357 if (backwards) {
358 /* Need to subtract 'reshape_position' from
359 * array size to get equivalent of sync_max.
360 * Size calculation based on raid5_size in kernel.
361 */
362 unsigned long long size = mdi->component_size;
363 size &= ~(chunk1-1);
364 size &= ~(chunk2-1);
365 /* rd1 must be smaller */
366 /* Reshape may have progressed further backwards than
367 * recorded, so target even further back (hence "-1")
368 */
369 position = (position / sectors - 1) * sectors;
370 /* rd1 is always the conversion factor between 'sync'
371 * position and 'reshape' position.
372 * We read 1 "new" stripe worth of data from where-ever,
373 * and when write out that full stripe.
374 */
375 sync_max = size - position/rd1;
376 } else {
377 /* Reshape will very likely be beyond position, and it may
378 * be too late to stop at '+1', so aim for '+2'
379 */
380 position = (position / sectors + 2) * sectors;
381 sync_max = position/rd1;
382 }
383 if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
384 old_sync_max = mdi->component_size;
385 /* Must not advance sync_max as that could confuse
386 * the reshape monitor */
387 if (sync_max < old_sync_max)
388 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
389 sysfs_set_str(mdi, NULL, "sync_action", "idle");
390
391 /* That should have set things going again. Now we
392 * wait a little while (3 second max) for sync_completed
393 * to reach the target.
394 * The reshape process can block for 500msec if
395 * the sync speed limit is hit, so we need to wait
396 * a lot longer than that. 1 second is usually
397 * enough. 3 is safe.
398 */
399 delay = 3000;
400 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
401 while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
402 unsigned long long max_completed;
403 sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
404 sysfs_fd_get_str(scfd, buf, sizeof(buf));
405 if (strncmp(buf, "none", 4) == 0) {
406 /* Either reshape has aborted, or hasn't
407 * quite started yet. Wait a bit and
408 * check 'sync_action' to see.
409 */
410 sleep_for(0, MSEC_TO_NSEC(10), true);
411 sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
412 if (strncmp(buf, "reshape", 7) != 0)
413 break;
414 }
415
416 if (sysfs_fd_get_two(scfd, &completed,
417 &max_completed) == 2 &&
418 /* 'completed' sometimes reads as max-uulong */
419 completed < max_completed &&
420 (completed > sync_max ||
421 (completed == sync_max && curr != position))) {
422 while (completed > sync_max) {
423 sync_max += sectors / rd1;
424 if (backwards)
425 position -= sectors;
426 else
427 position += sectors;
428 }
429 if (sync_max < old_sync_max)
430 sysfs_set_num(mdi, NULL, "sync_max", sync_max);
431 }
432
433 if (!backwards && curr >= position)
434 break;
435 if (backwards && curr <= position)
436 break;
437 sysfs_wait(scfd, &delay);
438 }
439 if (scfd >= 0)
440 close(scfd);
441
442 }
443 done:
444
445 /* As we have an O_EXCL open, any use of the device
446 * which blocks STOP_ARRAY is probably a transient use,
447 * so it is reasonable to retry for a while - 5 seconds.
448 */
449 count = 25; err = 0;
450 while (count && fd >= 0 &&
451 (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
452 sleep_for(0, MSEC_TO_NSEC(200), true);
453 count --;
454 }
455 if (fd >= 0 && err) {
456 if (verbose >= 0) {
457 pr_err("failed to stop array %s: %s\n",
458 devname, strerror(errno));
459 if (errno == EBUSY)
460 cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
461 }
462 rv = 1;
463 goto out;
464 }
465
466 if (devnm[0] && udev_is_available()) {
467 struct map_ent *mp = map_by_devnm(&map, devnm);
468 remove_devices(devnm, mp ? mp->path : NULL);
469 }
470
471 if (verbose >= 0)
472 pr_err("stopped %s\n", devname);
473 map_lock(&map);
474 map_remove(&map, devnm);
475 map_unlock(&map);
476 out:
477 sysfs_free(mdi);
478
479 return rv;
480 }
481
482 static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
483 {
484 struct mddev_dev *new;
485 new = xmalloc(sizeof(*new));
486 memset(new, 0, sizeof(*new));
487 new->devname = xstrdup(name);
488 new->disposition = disp;
489 new->next = dv->next;
490 dv->next = new;
491 return new;
492 }
493
494 static void add_faulty(struct mddev_dev *dv, int fd, char disp)
495 {
496 mdu_array_info_t array;
497 mdu_disk_info_t disk;
498 int remaining_disks;
499 int i;
500
501 if (md_get_array_info(fd, &array) != 0)
502 return;
503
504 remaining_disks = array.nr_disks;
505 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
506 char buf[40];
507 disk.number = i;
508 if (md_get_disk_info(fd, &disk) != 0)
509 continue;
510 if (disk.major == 0 && disk.minor == 0)
511 continue;
512 remaining_disks--;
513 if ((disk.state & 1) == 0) /* not faulty */
514 continue;
515 sprintf(buf, "%d:%d", disk.major, disk.minor);
516 dv = add_one(dv, buf, disp);
517 }
518 }
519
520 static void add_detached(struct mddev_dev *dv, int fd, char disp)
521 {
522 mdu_array_info_t array;
523 mdu_disk_info_t disk;
524 int remaining_disks;
525 int i;
526
527 if (md_get_array_info(fd, &array) != 0)
528 return;
529
530 remaining_disks = array.nr_disks;
531 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
532 char buf[40];
533 int sfd;
534 disk.number = i;
535 if (md_get_disk_info(fd, &disk) != 0)
536 continue;
537 if (disk.major == 0 && disk.minor == 0)
538 continue;
539 remaining_disks--;
540 if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
541 continue;
542 sprintf(buf, "%d:%d", disk.major, disk.minor);
543 sfd = dev_open(buf, O_RDONLY);
544 if (sfd >= 0) {
545 /* Not detached */
546 close(sfd);
547 continue;
548 }
549 if (errno != ENXIO)
550 /* Probably not detached */
551 continue;
552 dv = add_one(dv, buf, disp);
553 }
554 }
555
556 static void add_set(struct mddev_dev *dv, int fd, char set_char)
557 {
558 mdu_array_info_t array;
559 mdu_disk_info_t disk;
560 int remaining_disks;
561 int copies, set;
562 int i;
563
564 if (md_get_array_info(fd, &array) != 0)
565 return;
566 if (array.level != 10)
567 return;
568 copies = ((array.layout & 0xff) *
569 ((array.layout >> 8) & 0xff));
570 if (array.raid_disks % copies)
571 return;
572
573 remaining_disks = array.nr_disks;
574 for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
575 char buf[40];
576 disk.number = i;
577 if (md_get_disk_info(fd, &disk) != 0)
578 continue;
579 if (disk.major == 0 && disk.minor == 0)
580 continue;
581 remaining_disks--;
582 set = disk.raid_disk % copies;
583 if (set_char != set + 'A')
584 continue;
585 sprintf(buf, "%d:%d", disk.major, disk.minor);
586 dv = add_one(dv, buf, dv->disposition);
587 }
588 }
589
590 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
591 struct supertype *dev_st, struct supertype *tst,
592 unsigned long rdev, enum update_opt update,
593 char *devname, int verbose, mdu_array_info_t *array)
594 {
595 struct mdinfo mdi;
596 int duuid[4];
597 int ouuid[4];
598
599 dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
600 dev_st->ss->uuid_from_super(dev_st, ouuid);
601 if (tst->sb)
602 tst->ss->uuid_from_super(tst, duuid);
603 else
604 /* Assume uuid matches: kernel will check */
605 memcpy(duuid, ouuid, sizeof(ouuid));
606 if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
607 !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
608 memcmp(duuid, ouuid, sizeof(ouuid))==0) {
609 /* Looks like it is worth a
610 * try. Need to make sure
611 * kernel will accept it
612 * though.
613 */
614 mdu_disk_info_t disc;
615 disc.number = mdi.disk.number;
616 if (md_get_disk_info(fd, &disc) != 0 ||
617 disc.major != 0 || disc.minor != 0)
618 goto skip_re_add;
619 disc.major = major(rdev);
620 disc.minor = minor(rdev);
621 disc.number = mdi.disk.number;
622 disc.raid_disk = mdi.disk.raid_disk;
623 disc.state = mdi.disk.state;
624 if (array->state & (1 << MD_SB_CLUSTERED)) {
625 /* extra flags are needed when adding to a cluster as
626 * there are two cases to distinguish
627 */
628 if (dv->disposition == 'c')
629 disc.state |= (1 << MD_DISK_CANDIDATE);
630 else
631 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
632 }
633 if (dv->writemostly == FlagSet)
634 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
635 if (dv->writemostly == FlagClear)
636 disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
637 if (dv->failfast == FlagSet)
638 disc.state |= 1 << MD_DISK_FAILFAST;
639 if (dv->failfast == FlagClear)
640 disc.state &= ~(1 << MD_DISK_FAILFAST);
641 remove_partitions(tfd);
642 if (update || dv->writemostly != FlagDefault ||
643 dv->failfast != FlagDefault) {
644 int rv = -1;
645 tfd = dev_open(dv->devname, O_RDWR);
646 if (tfd < 0) {
647 pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
648 return -1;
649 }
650
651 if (dv->writemostly == FlagSet)
652 rv = dev_st->ss->update_super(
653 dev_st, NULL, UOPT_SPEC_WRITEMOSTLY,
654 devname, verbose, 0, NULL);
655 if (dv->writemostly == FlagClear)
656 rv = dev_st->ss->update_super(
657 dev_st, NULL, UOPT_SPEC_READWRITE,
658 devname, verbose, 0, NULL);
659 if (dv->failfast == FlagSet)
660 rv = dev_st->ss->update_super(
661 dev_st, NULL, UOPT_SPEC_FAILFAST,
662 devname, verbose, 0, NULL);
663 if (dv->failfast == FlagClear)
664 rv = dev_st->ss->update_super(
665 dev_st, NULL, UOPT_SPEC_NOFAILFAST,
666 devname, verbose, 0, NULL);
667 if (update)
668 rv = dev_st->ss->update_super(
669 dev_st, NULL, update,
670 devname, verbose, 0, NULL);
671 if (rv == 0)
672 rv = dev_st->ss->store_super(dev_st, tfd);
673 close(tfd);
674 if (rv != 0) {
675 pr_err("failed to update superblock during re-add\n");
676 return -1;
677 }
678 }
679 /* don't even try if disk is marked as faulty */
680 errno = 0;
681 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
682 if (verbose >= 0)
683 pr_err("re-added %s\n", dv->devname);
684 return 1;
685 }
686 if (errno == ENOMEM || errno == EROFS) {
687 pr_err("add new device failed for %s: %s\n",
688 dv->devname, strerror(errno));
689 if (dv->disposition == 'M')
690 return 0;
691 return -1;
692 }
693 }
694 skip_re_add:
695 return 0;
696 }
697
698 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
699 struct supertype *tst, mdu_array_info_t *array,
700 int force, int verbose, char *devname,
701 enum update_opt update, unsigned long rdev,
702 unsigned long long array_size, int raid_slot)
703 {
704 unsigned long long ldsize;
705 struct supertype *dev_st;
706 int j;
707 mdu_disk_info_t disc;
708 struct map_ent *map = NULL;
709
710 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
711 if (dv->disposition == 'M')
712 return 0;
713 else
714 return -1;
715 }
716
717 if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
718 /* More than 4TB is wasted on v0.90 */
719 if (!force) {
720 pr_err("%s is larger than %s can effectively use.\n"
721 " Add --force is you really want to add this device.\n",
722 dv->devname, devname);
723 return -1;
724 }
725 pr_err("%s is larger than %s can effectively use.\n"
726 " Adding anyway as --force was given.\n",
727 dv->devname, devname);
728 }
729
730 if (array->not_persistent == 0 || tst->ss->external) {
731
732 /* need to find a sample superblock to copy, and
733 * a spare slot to use.
734 * For 'external' array (well, container based),
735 * We can just load the metadata for the array->
736 */
737 int array_failed;
738 if (tst->sb)
739 /* already loaded */;
740 else if (tst->ss->external) {
741 tst->ss->load_container(tst, fd, NULL);
742 } else for (j = 0; j < tst->max_devs; j++) {
743 char *dev;
744 int dfd;
745 disc.number = j;
746 if (md_get_disk_info(fd, &disc))
747 continue;
748 if (disc.major==0 && disc.minor==0)
749 continue;
750 if ((disc.state & 4)==0) /* sync */
751 continue;
752 /* Looks like a good device to try */
753 dev = map_dev(disc.major, disc.minor, 1);
754 if (!dev)
755 continue;
756 dfd = dev_open(dev, O_RDONLY);
757 if (dfd < 0)
758 continue;
759 if (tst->ss->load_super(tst, dfd,
760 NULL)) {
761 close(dfd);
762 continue;
763 }
764 close(dfd);
765 break;
766 }
767 /* FIXME this is a bad test to be using */
768 if (!tst->sb && (dv->disposition != 'a' &&
769 dv->disposition != 'S')) {
770 /* we are re-adding a device to a
771 * completely dead array - have to depend
772 * on kernel to check
773 */
774 } else if (!tst->sb) {
775 pr_err("cannot load array metadata from %s\n", devname);
776 return -1;
777 }
778
779 /* Make sure device is large enough */
780 if (dv->disposition != 'j' && /* skip size check for Journal */
781 tst->sb &&
782 tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
783 array_size) {
784 if (dv->disposition == 'M')
785 return 0;
786 pr_err("%s not large enough to join array\n",
787 dv->devname);
788 return -1;
789 }
790
791 /* Possibly this device was recently part of
792 * the array and was temporarily removed, and
793 * is now being re-added. If so, we can
794 * simply re-add it.
795 */
796
797 if (array->not_persistent == 0) {
798 dev_st = dup_super(tst);
799 dev_st->ss->load_super(dev_st, tfd, NULL);
800 if (dev_st->sb && dv->disposition != 'S') {
801 int rv;
802
803 rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
804 rdev, update, devname,
805 verbose, array);
806 dev_st->ss->free_super(dev_st);
807 if (rv) {
808 free(dev_st);
809 return rv;
810 }
811 }
812 if (dev_st) {
813 dev_st->ss->free_super(dev_st);
814 free(dev_st);
815 }
816 }
817 if (dv->disposition == 'M') {
818 if (verbose > 0)
819 pr_err("--re-add for %s to %s is not possible\n",
820 dv->devname, devname);
821 return 0;
822 }
823 if (dv->disposition == 'A') {
824 pr_err("--re-add for %s to %s is not possible\n",
825 dv->devname, devname);
826 return -1;
827 }
828 if (array->active_disks < array->raid_disks) {
829 char *avail = xcalloc(array->raid_disks, 1);
830 int d;
831 int found = 0;
832
833 for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
834 disc.number = d;
835 if (md_get_disk_info(fd, &disc))
836 continue;
837 if (disc.major == 0 && disc.minor == 0)
838 continue;
839 if (!(disc.state & (1<<MD_DISK_SYNC)))
840 continue;
841 avail[disc.raid_disk] = 1;
842 found++;
843 }
844 array_failed = !enough(array->level, array->raid_disks,
845 array->layout, 1, avail);
846 free(avail);
847 } else
848 array_failed = 0;
849 if (array_failed) {
850 pr_err("%s has failed so using --add cannot work and might destroy\n",
851 devname);
852 pr_err("data on %s. You should stop the array and re-assemble it.\n",
853 dv->devname);
854 return -1;
855 }
856 } else {
857 /* non-persistent. Must ensure that new drive
858 * is at least array->size big.
859 */
860 if (ldsize/512 < array_size) {
861 pr_err("%s not large enough to join array\n",
862 dv->devname);
863 return -1;
864 }
865 }
866 /* committed to really trying this device now*/
867 remove_partitions(tfd);
868
869 /* in 2.6.17 and earlier, version-1 superblocks won't
870 * use the number we write, but will choose a free number.
871 * we must choose the same free number, which requires
872 * starting at 'raid_disks' and counting up
873 */
874 for (j = array->raid_disks; j < tst->max_devs; j++) {
875 disc.number = j;
876 if (md_get_disk_info(fd, &disc))
877 break;
878 if (disc.major==0 && disc.minor==0)
879 break;
880 if (disc.state & 8) /* removed */
881 break;
882 }
883 disc.major = major(rdev);
884 disc.minor = minor(rdev);
885 if (raid_slot < 0)
886 disc.number = j;
887 else
888 disc.number = raid_slot;
889 disc.state = 0;
890
891 /* only add journal to array that supports journaling */
892 if (dv->disposition == 'j') {
893 struct mdinfo *mdp;
894
895 mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
896 if (!mdp) {
897 pr_err("%s unable to read array state.\n", devname);
898 return -1;
899 }
900
901 if (mdp->array_state != ARRAY_READONLY) {
902 sysfs_free(mdp);
903 pr_err("%s is not readonly, cannot add journal.\n", devname);
904 return -1;
905 }
906
907 sysfs_free(mdp);
908
909 disc.raid_disk = 0;
910 }
911
912 if (map_lock(&map))
913 pr_err("failed to get exclusive lock on mapfile when add disk\n");
914
915 if (array->not_persistent==0) {
916 int dfd;
917 if (dv->disposition == 'j')
918 disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
919 if (dv->writemostly == FlagSet)
920 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
921 if (dv->failfast == FlagSet)
922 disc.state |= 1 << MD_DISK_FAILFAST;
923 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
924 if (tst->ss->add_to_super(tst, &disc, dfd,
925 dv->devname, INVALID_SECTORS))
926 goto unlock;
927 if (tst->ss->write_init_super(tst))
928 goto unlock;
929 } else if (dv->disposition == 'A') {
930 /* this had better be raid1.
931 * As we are "--re-add"ing we must find a spare slot
932 * to fill.
933 */
934 char *used = xcalloc(array->raid_disks, 1);
935 for (j = 0; j < tst->max_devs; j++) {
936 mdu_disk_info_t disc2;
937 disc2.number = j;
938 if (md_get_disk_info(fd, &disc2))
939 continue;
940 if (disc2.major==0 && disc2.minor==0)
941 continue;
942 if (disc2.state & 8) /* removed */
943 continue;
944 if (disc2.raid_disk < 0)
945 continue;
946 if (disc2.raid_disk > array->raid_disks)
947 continue;
948 used[disc2.raid_disk] = 1;
949 }
950 for (j = 0 ; j < array->raid_disks; j++)
951 if (!used[j]) {
952 disc.raid_disk = j;
953 disc.state |= (1<<MD_DISK_SYNC);
954 break;
955 }
956 free(used);
957 }
958
959 if (array->state & (1 << MD_SB_CLUSTERED)) {
960 if (dv->disposition == 'c')
961 disc.state |= (1 << MD_DISK_CANDIDATE);
962 else
963 disc.state |= (1 << MD_DISK_CLUSTER_ADD);
964 }
965
966 if (dv->writemostly == FlagSet)
967 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
968 if (dv->failfast == FlagSet)
969 disc.state |= (1 << MD_DISK_FAILFAST);
970 if (tst->ss->external) {
971 /* add a disk
972 * to an external metadata container */
973 struct mdinfo new_mdi;
974 struct mdinfo *sra;
975 int container_fd;
976 char devnm[32];
977 int dfd;
978
979 strcpy(devnm, fd2devnm(fd));
980
981 container_fd = open_dev_excl(devnm);
982 if (container_fd < 0) {
983 pr_err("add failed for %s: could not get exclusive access to container\n",
984 dv->devname);
985 tst->ss->free_super(tst);
986 goto unlock;
987 }
988
989 /* Check if metadata handler is able to accept the drive */
990 if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
991 0, 0, dv->devname, NULL, 0, 1)) {
992 close(container_fd);
993 goto unlock;
994 }
995
996 Kill(dv->devname, NULL, 0, -1, 0);
997 dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
998 if (tst->ss->add_to_super(tst, &disc, dfd,
999 dv->devname, INVALID_SECTORS)) {
1000 close(dfd);
1001 close(container_fd);
1002 goto unlock;
1003 }
1004 if (!mdmon_running(tst->container_devnm))
1005 tst->ss->sync_metadata(tst);
1006
1007 sra = sysfs_read(container_fd, NULL, 0);
1008 if (!sra) {
1009 pr_err("add failed for %s: sysfs_read failed\n",
1010 dv->devname);
1011 close(container_fd);
1012 tst->ss->free_super(tst);
1013 goto unlock;
1014 }
1015 sra->array.level = LEVEL_CONTAINER;
1016 /* Need to set data_offset and component_size */
1017 tst->ss->getinfo_super(tst, &new_mdi, NULL);
1018 new_mdi.disk.major = disc.major;
1019 new_mdi.disk.minor = disc.minor;
1020 new_mdi.recovery_start = 0;
1021 /* Make sure fds are closed as they are O_EXCL which
1022 * would block add_disk */
1023 tst->ss->free_super(tst);
1024 if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
1025 pr_err("add new device to external metadata failed for %s\n", dv->devname);
1026 close(container_fd);
1027 sysfs_free(sra);
1028 goto unlock;
1029 }
1030 ping_monitor(devnm);
1031 sysfs_free(sra);
1032 close(container_fd);
1033 } else {
1034 tst->ss->free_super(tst);
1035 if (ioctl(fd, ADD_NEW_DISK, &disc)) {
1036 if (dv->disposition == 'j')
1037 pr_err("Failed to hot add %s as journal, "
1038 "please try restart %s.\n", dv->devname, devname);
1039 else
1040 pr_err("add new device failed for %s as %d: %s\n",
1041 dv->devname, j, strerror(errno));
1042 goto unlock;
1043 }
1044 if (dv->disposition == 'j') {
1045 pr_err("Journal added successfully, making %s read-write\n", devname);
1046 if (Manage_ro(devname, fd, -1))
1047 pr_err("Failed to make %s read-write\n", devname);
1048 }
1049
1050 }
1051 if (verbose >= 0)
1052 pr_err("added %s\n", dv->devname);
1053 map_unlock(&map);
1054 return 1;
1055 unlock:
1056 map_unlock(&map);
1057 return -1;
1058 }
1059
1060 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
1061 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
1062 {
1063 int lfd = -1;
1064 int err;
1065
1066 if (tst->ss->external) {
1067 /* To remove a device from a container, we must
1068 * check that it isn't in use in an array.
1069 * This involves looking in the 'holders'
1070 * directory - there must be just one entry,
1071 * the container.
1072 * To ensure that it doesn't get used as a
1073 * hot spare while we are checking, we
1074 * get an O_EXCL open on the container
1075 */
1076 int ret;
1077 char devnm[32];
1078 strcpy(devnm, fd2devnm(fd));
1079 lfd = open_dev_excl(devnm);
1080 if (lfd < 0) {
1081 pr_err("Cannot get exclusive access to container - odd\n");
1082 return -1;
1083 }
1084 /* We may not be able to check on holders in
1085 * sysfs, either because we don't have the dev num
1086 * (rdev == 0) or because the device has been detached
1087 * and the 'holders' directory no longer exists
1088 * (ret == -1). In that case, assume it is OK to
1089 * remove.
1090 */
1091 if (rdev == 0)
1092 ret = -1;
1093 else {
1094 /*
1095 * The drive has already been set to 'faulty', however
1096 * monitor might not have had time to process it and the
1097 * drive might still have an entry in the 'holders'
1098 * directory. Try a few times to avoid a false error
1099 */
1100 int count = 20;
1101
1102 do {
1103 ret = sysfs_unique_holder(devnm, rdev);
1104 if (ret < 2)
1105 break;
1106 sleep_for(0, MSEC_TO_NSEC(100), true);
1107 } while (--count > 0);
1108
1109 if (ret == 0) {
1110 pr_err("%s is not a member, cannot remove.\n",
1111 dv->devname);
1112 close(lfd);
1113 return -1;
1114 }
1115 if (ret >= 2) {
1116 pr_err("%s is still in use, cannot remove.\n",
1117 dv->devname);
1118 close(lfd);
1119 return -1;
1120 }
1121 }
1122 }
1123 /* FIXME check that it is a current member */
1124 if (sysfd >= 0) {
1125 /* device has been removed and we don't know
1126 * the major:minor number
1127 */
1128 err = sys_hot_remove_disk(sysfd, force);
1129 } else {
1130 err = hot_remove_disk(fd, rdev, force);
1131 if (err && errno == ENODEV) {
1132 /* Old kernels rejected this if no personality
1133 * is registered */
1134 struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
1135 struct mdinfo *dv = NULL;
1136 if (sra)
1137 dv = sra->devs;
1138 for ( ; dv ; dv=dv->next)
1139 if (dv->disk.major == (int)major(rdev) &&
1140 dv->disk.minor == (int)minor(rdev))
1141 break;
1142 if (dv)
1143 err = sysfs_set_str(sra, dv,
1144 "state", "remove");
1145 else
1146 err = -1;
1147 sysfs_free(sra);
1148 }
1149 }
1150 if (err) {
1151 pr_err("hot remove failed for %s: %s\n", dv->devname,
1152 strerror(errno));
1153 if (lfd >= 0)
1154 close(lfd);
1155 return -1;
1156 }
1157 if (tst->ss->external) {
1158 /*
1159 * Before dropping our exclusive open we make an
1160 * attempt at preventing mdmon from seeing an
1161 * 'add' event before reconciling this 'remove'
1162 * event.
1163 */
1164 char *devnm = fd2devnm(fd);
1165
1166 if (!devnm) {
1167 pr_err("unable to get container name\n");
1168 return -1;
1169 }
1170
1171 ping_manager(devnm);
1172 }
1173 if (lfd >= 0)
1174 close(lfd);
1175 if (verbose >= 0)
1176 pr_err("hot removed %s from %s\n",
1177 dv->devname, devname);
1178 return 1;
1179 }
1180
1181 int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
1182 unsigned long rdev, int verbose, char *devname)
1183 {
1184 struct mdinfo *mdi, *di;
1185 if (tst->ss->external) {
1186 pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
1187 return -1;
1188 }
1189 /* Need to find the device in sysfs and add 'want_replacement' to the
1190 * status.
1191 */
1192 mdi = sysfs_read(fd, NULL, GET_DEVS);
1193 if (!mdi || !mdi->devs) {
1194 pr_err("Cannot find status of %s to enable replacement - strange\n",
1195 devname);
1196 return -1;
1197 }
1198 for (di = mdi->devs; di; di = di->next)
1199 if (di->disk.major == (int)major(rdev) &&
1200 di->disk.minor == (int)minor(rdev))
1201 break;
1202 if (di) {
1203 int rv;
1204 if (di->disk.raid_disk < 0) {
1205 pr_err("%s is not active and so cannot be replaced.\n",
1206 dv->devname);
1207 sysfs_free(mdi);
1208 return -1;
1209 }
1210 rv = sysfs_set_str(mdi, di,
1211 "state", "want_replacement");
1212 if (rv) {
1213 sysfs_free(mdi);
1214 pr_err("Failed to request replacement for %s\n",
1215 dv->devname);
1216 return -1;
1217 }
1218 if (verbose >= 0)
1219 pr_err("Marked %s (device %d in %s) for replacement\n",
1220 dv->devname, di->disk.raid_disk, devname);
1221 /* If there is a matching 'with', we need to tell it which
1222 * raid disk
1223 */
1224 while (dv && dv->disposition != 'W')
1225 dv = dv->next;
1226 if (dv) {
1227 dv->disposition = 'w';
1228 dv->used = di->disk.raid_disk;
1229 }
1230 return 1;
1231 }
1232 sysfs_free(mdi);
1233 pr_err("%s not found in %s so cannot --replace it\n",
1234 dv->devname, devname);
1235 return -1;
1236 }
1237
1238 int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
1239 unsigned long rdev, int verbose, char *devname)
1240 {
1241 struct mdinfo *mdi, *di;
1242 /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
1243 mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
1244 if (!mdi || !mdi->devs) {
1245 pr_err("Cannot find status of %s to enable replacement - strange\n",
1246 devname);
1247 return -1;
1248 }
1249 for (di = mdi->devs; di; di = di->next)
1250 if (di->disk.major == (int)major(rdev) &&
1251 di->disk.minor == (int)minor(rdev))
1252 break;
1253 if (di) {
1254 int rv;
1255 if (di->disk.state & (1<<MD_DISK_FAULTY)) {
1256 pr_err("%s is faulty and cannot be a replacement\n",
1257 dv->devname);
1258 sysfs_free(mdi);
1259 return -1;
1260 }
1261 if (di->disk.raid_disk >= 0) {
1262 pr_err("%s is active and cannot be a replacement\n",
1263 dv->devname);
1264 sysfs_free(mdi);
1265 return -1;
1266 }
1267 rv = sysfs_set_num(mdi, di,
1268 "slot", dv->used);
1269 if (rv) {
1270 sysfs_free(mdi);
1271 pr_err("Failed to set %s as preferred replacement.\n",
1272 dv->devname);
1273 return -1;
1274 }
1275 if (verbose >= 0)
1276 pr_err("Marked %s in %s as replacement for device %d\n",
1277 dv->devname, devname, dv->used);
1278 return 1;
1279 }
1280 sysfs_free(mdi);
1281 pr_err("%s not found in %s so cannot make it preferred replacement\n",
1282 dv->devname, devname);
1283 return -1;
1284 }
1285
1286 /**
1287 * is_remove_safe() - Check if remove is safe.
1288 * @array: Array info.
1289 * @fd: Array file descriptor.
1290 * @devname: Name of device to remove.
1291 * @verbose: Verbose.
1292 *
1293 * The function determines if array will be operational
1294 * after removing &devname.
1295 *
1296 * Return: True if array will be operational, false otherwise.
1297 */
1298 bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
1299 {
1300 dev_t devid = devnm2devid(devname + 5);
1301 struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
1302
1303 if (!mdi) {
1304 if (verbose)
1305 pr_err("Failed to read sysfs attributes for %s\n", devname);
1306 return false;
1307 }
1308
1309 char *avail = xcalloc(array->raid_disks, sizeof(char));
1310
1311 for (mdi = mdi->devs; mdi; mdi = mdi->next) {
1312 if (mdi->disk.raid_disk < 0)
1313 continue;
1314 if (!(mdi->disk.state & (1 << MD_DISK_SYNC)))
1315 continue;
1316 if (makedev(mdi->disk.major, mdi->disk.minor) == devid)
1317 continue;
1318 avail[mdi->disk.raid_disk] = 1;
1319 }
1320 sysfs_free(mdi);
1321
1322 bool is_enough = enough(array->level, array->raid_disks,
1323 array->layout, 1, avail);
1324
1325 free(avail);
1326 return is_enough;
1327 }
1328
1329 /**
1330 * Manage_subdevs() - Execute operation depending on devmode.
1331 *
1332 * @devname: name of the device.
1333 * @fd: file descriptor.
1334 * @devlist: list of sub-devices to manage.
1335 * @verbose: verbose level.
1336 * @test: test flag.
1337 * @update: type of update.
1338 * @force: force flag.
1339 *
1340 * This function executes operation defined by devmode
1341 * for each dev from devlist.
1342 * Devmode can be:
1343 * 'a' - add the device
1344 * 'S' - add the device as a spare - don't try re-add
1345 * 'j' - add the device as a journal device
1346 * 'A' - re-add the device
1347 * 'r' - remove the device: HOT_REMOVE_DISK
1348 * device can be 'faulty' or 'detached' in which case all
1349 * matching devices are removed.
1350 * 'f' - set the device faulty SET_DISK_FAULTY
1351 * device can be 'detached' in which case any device that
1352 * is inaccessible will be marked faulty.
1353 * 'I' - remove device by using incremental fail
1354 * which is executed when device is removed surprisingly.
1355 * 'R' - mark this device as wanting replacement.
1356 * 'W' - this device is added if necessary and activated as
1357 * a replacement for a previous 'R' device.
1358 * -----
1359 * 'w' - 'W' will be changed to 'w' when it is paired with
1360 * a 'R' device. If a 'W' is found while walking the list
1361 * it must be unpaired, and is an error.
1362 * 'M' - this is created by a 'missing' target. It is a slight
1363 * variant on 'A'
1364 * 'F' - Another variant of 'A', where the device was faulty
1365 * so must be removed from the array first.
1366 * 'c' - confirm the device as found (for clustered environments)
1367 *
1368 * For 'f' and 'r', the device can also be a kernel-internal
1369 * name such as 'sdb'.
1370 *
1371 * Return: 0 on success, otherwise 1 or 2.
1372 */
1373 int Manage_subdevs(char *devname, int fd,
1374 struct mddev_dev *devlist, int verbose, int test,
1375 enum update_opt update, int force)
1376 {
1377 mdu_array_info_t array;
1378 unsigned long long array_size;
1379 struct mddev_dev *dv;
1380 int tfd = -1;
1381 struct supertype *tst = NULL;
1382 char *subarray = NULL;
1383 int sysfd = -1;
1384 int count = 0; /* number of actions taken */
1385 struct mdinfo info;
1386 struct mdinfo devinfo;
1387 int frozen = 0;
1388 int busy = 0;
1389 int raid_slot = -1;
1390
1391 if (sysfs_init(&info, fd, NULL)) {
1392 pr_err("sysfs not availabile for %s\n", devname);
1393 goto abort;
1394 }
1395
1396 if (md_get_array_info(fd, &array)) {
1397 pr_err("Cannot get array info for %s\n", devname);
1398 goto abort;
1399 }
1400 /* array.size is only 32 bits and may be truncated.
1401 * So read from sysfs if possible, and record number of sectors
1402 */
1403
1404 array_size = get_component_size(fd);
1405 if (array_size <= 0)
1406 array_size = array.size * 2;
1407
1408 tst = super_by_fd(fd, &subarray);
1409 if (!tst) {
1410 pr_err("unsupport array - version %d.%d\n",
1411 array.major_version, array.minor_version);
1412 goto abort;
1413 }
1414
1415 for (dv = devlist; dv; dv = dv->next) {
1416 dev_t rdev = 0; /* device to add/remove etc */
1417 int rv;
1418 int mj,mn;
1419
1420 raid_slot = -1;
1421 if (dv->disposition == 'c') {
1422 rv = parse_cluster_confirm_arg(dv->devname,
1423 &dv->devname,
1424 &raid_slot);
1425 if (rv) {
1426 pr_err("Could not get the devname of cluster\n");
1427 goto abort;
1428 }
1429 }
1430
1431 if (strcmp(dv->devname, "failed") == 0 ||
1432 strcmp(dv->devname, "faulty") == 0) {
1433 if (dv->disposition != 'A' && dv->disposition != 'r') {
1434 pr_err("%s only meaningful with -r or --re-add, not -%c\n",
1435 dv->devname, dv->disposition);
1436 goto abort;
1437 }
1438 add_faulty(dv, fd, (dv->disposition == 'A'
1439 ? 'F' : 'r'));
1440 continue;
1441 }
1442 if (strcmp(dv->devname, "detached") == 0) {
1443 if (dv->disposition != 'r' && dv->disposition != 'f') {
1444 pr_err("%s only meaningful with -r of -f, not -%c\n",
1445 dv->devname, dv->disposition);
1446 goto abort;
1447 }
1448 add_detached(dv, fd, dv->disposition);
1449 continue;
1450 }
1451
1452 if (strcmp(dv->devname, "missing") == 0) {
1453 struct mddev_dev *add_devlist;
1454 struct mddev_dev **dp;
1455 if (dv->disposition == 'c') {
1456 rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
1457 break;
1458 }
1459
1460 if (dv->disposition != 'A') {
1461 pr_err("'missing' only meaningful with --re-add\n");
1462 goto abort;
1463 }
1464 add_devlist = conf_get_devs();
1465 if (add_devlist == NULL) {
1466 pr_err("no devices to scan for missing members.\n");
1467 continue;
1468 }
1469 for (dp = &add_devlist; *dp; dp = & (*dp)->next)
1470 /* 'M' (for 'missing') is like 'A' without errors */
1471 (*dp)->disposition = 'M';
1472 *dp = dv->next;
1473 dv->next = add_devlist;
1474 continue;
1475 }
1476
1477 if (strncmp(dv->devname, "set-", 4) == 0 &&
1478 strlen(dv->devname) == 5) {
1479 int copies;
1480
1481 if (dv->disposition != 'r' &&
1482 dv->disposition != 'f') {
1483 pr_err("'%s' only meaningful with -r or -f\n",
1484 dv->devname);
1485 goto abort;
1486 }
1487 if (array.level != 10) {
1488 pr_err("'%s' only meaningful with RAID10 arrays\n",
1489 dv->devname);
1490 goto abort;
1491 }
1492 copies = ((array.layout & 0xff) *
1493 ((array.layout >> 8) & 0xff));
1494 if (array.raid_disks % copies != 0 ||
1495 dv->devname[4] < 'A' ||
1496 dv->devname[4] >= 'A' + copies ||
1497 copies > 26) {
1498 pr_err("'%s' not meaningful with this array\n",
1499 dv->devname);
1500 goto abort;
1501 }
1502 add_set(dv, fd, dv->devname[4]);
1503 continue;
1504 }
1505
1506 if (strchr(dv->devname, '/') == NULL &&
1507 strchr(dv->devname, ':') == NULL &&
1508 strlen(dv->devname) < 50) {
1509 /* Assume this is a kernel-internal name like 'sda1' */
1510 int found = 0;
1511 char dname[55];
1512 if (dv->disposition != 'r' && dv->disposition != 'f' &&
1513 dv->disposition != 'I') {
1514 pr_err("%s only meaningful with -r, -f or -I, not -%c\n",
1515 dv->devname, dv->disposition);
1516 goto abort;
1517 }
1518
1519 sprintf(dname, "dev-%s", dv->devname);
1520 sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
1521 if (sysfd >= 0) {
1522 char dn[20];
1523 if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
1524 sscanf(dn, "%d:%d", &mj,&mn) == 2) {
1525 rdev = makedev(mj,mn);
1526 found = 1;
1527 }
1528 close(sysfd);
1529 sysfd = -1;
1530 }
1531 if (!found) {
1532 sysfd = sysfs_open(fd2devnm(fd), dname, "state");
1533 if (sysfd < 0) {
1534 pr_err("%s does not appear to be a component of %s\n",
1535 dv->devname, devname);
1536 goto abort;
1537 }
1538 }
1539 } else if ((dv->disposition == 'r' ||
1540 dv->disposition == 'f') &&
1541 get_maj_min(dv->devname, &mj, &mn)) {
1542 /* for 'fail' and 'remove', the device might
1543 * not exist.
1544 */
1545 rdev = makedev(mj, mn);
1546 } else {
1547 tfd = dev_open(dv->devname, O_RDONLY);
1548 if (tfd >= 0) {
1549 fstat_is_blkdev(tfd, dv->devname, &rdev);
1550 close(tfd);
1551 } else {
1552 int open_err = errno;
1553 if (!stat_is_blkdev(dv->devname, &rdev)) {
1554 if (dv->disposition == 'M')
1555 /* non-fatal. Also improbable */
1556 continue;
1557 goto abort;
1558 }
1559 if (dv->disposition == 'r')
1560 /* Be happy, the stat worked, that is
1561 * enough for --remove
1562 */
1563 ;
1564 else {
1565 if (dv->disposition == 'M')
1566 /* non-fatal */
1567 continue;
1568 pr_err("Cannot open %s: %s\n",
1569 dv->devname, strerror(open_err));
1570 goto abort;
1571 }
1572 }
1573 }
1574 switch(dv->disposition){
1575 default:
1576 pr_err("internal error - devmode[%s]=%d\n",
1577 dv->devname, dv->disposition);
1578 goto abort;
1579 case 'a':
1580 case 'S': /* --add-spare */
1581 case 'j': /* --add-journal */
1582 case 'A':
1583 case 'M': /* --re-add missing */
1584 case 'F': /* --re-add faulty */
1585 case 'c': /* --cluster-confirm */
1586 /* add the device */
1587 if (subarray) {
1588 pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
1589 goto abort;
1590 }
1591
1592 /* Let's first try to write re-add to sysfs */
1593 if (rdev != 0 &&
1594 (dv->disposition == 'A' || dv->disposition == 'F')) {
1595 sysfs_init_dev(&devinfo, rdev);
1596 if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
1597 pr_err("re-add %s to %s succeed\n",
1598 dv->devname, info.sys_name);
1599 break;
1600 }
1601 }
1602
1603 if (dv->disposition == 'F')
1604 /* Need to remove first */
1605 hot_remove_disk(fd, rdev, force);
1606 /* Make sure it isn't in use (in 2.6 or later) */
1607 tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
1608 if (tfd >= 0) {
1609 /* We know no-one else is using it. We'll
1610 * need non-exclusive access to add it, so
1611 * do that now.
1612 */
1613 close(tfd);
1614 tfd = dev_open(dv->devname, O_RDONLY);
1615 }
1616 if (tfd < 0) {
1617 if (dv->disposition == 'M')
1618 continue;
1619 pr_err("Cannot open %s: %s\n",
1620 dv->devname, strerror(errno));
1621 goto abort;
1622 }
1623 if (!frozen) {
1624 if (sysfs_freeze_array(&info) == 1)
1625 frozen = 1;
1626 else
1627 frozen = -1;
1628 }
1629 rv = Manage_add(fd, tfd, dv, tst, &array,
1630 force, verbose, devname, update,
1631 rdev, array_size, raid_slot);
1632 close(tfd);
1633 tfd = -1;
1634 if (rv < 0)
1635 goto abort;
1636 if (rv > 0)
1637 count++;
1638 break;
1639
1640 case 'r':
1641 /* hot remove */
1642 if (subarray) {
1643 pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
1644 rv = -1;
1645 } else
1646 rv = Manage_remove(tst, fd, dv, sysfd,
1647 rdev, verbose, force,
1648 devname);
1649 if (sysfd >= 0)
1650 close(sysfd);
1651 sysfd = -1;
1652 if (rv < 0)
1653 goto abort;
1654 if (rv > 0)
1655 count++;
1656 break;
1657
1658 case 'f': /* set faulty */
1659 if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
1660 pr_err("Cannot remove %s from %s, array will be failed.\n",
1661 dv->devname, devname);
1662 if (sysfd >= 0)
1663 close(sysfd);
1664 goto abort;
1665 }
1666 case 'I': /* incremental fail */
1667 if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
1668 (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
1669 rdev))) {
1670 if (errno == EBUSY)
1671 busy = 1;
1672 pr_err("set device faulty failed for %s: %s\n",
1673 dv->devname, strerror(errno));
1674 if (sysfd >= 0)
1675 close(sysfd);
1676 goto abort;
1677 }
1678 if (sysfd >= 0)
1679 close(sysfd);
1680 sysfd = -1;
1681 count++;
1682 if (verbose >= 0)
1683 pr_err("set %s faulty in %s\n",
1684 dv->devname, devname);
1685 break;
1686 case 'R': /* Mark as replaceable */
1687 if (subarray) {
1688 pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
1689 rv = -1;
1690 } else {
1691 if (!frozen) {
1692 if (sysfs_freeze_array(&info) == 1)
1693 frozen = 1;
1694 else
1695 frozen = -1;
1696 }
1697 rv = Manage_replace(tst, fd, dv,
1698 rdev, verbose,
1699 devname);
1700 }
1701 if (rv < 0)
1702 goto abort;
1703 if (rv > 0)
1704 count++;
1705 break;
1706 case 'W': /* --with device that doesn't match */
1707 pr_err("No matching --replace device for --with %s\n",
1708 dv->devname);
1709 goto abort;
1710 case 'w': /* --with device which was matched */
1711 rv = Manage_with(tst, fd, dv,
1712 rdev, verbose, devname);
1713 if (rv < 0)
1714 goto abort;
1715 break;
1716 }
1717 }
1718 free(tst);
1719 if (frozen > 0)
1720 sysfs_set_str(&info, NULL, "sync_action","idle");
1721 if (test && count == 0)
1722 return 2;
1723 return 0;
1724
1725 abort:
1726 free(tst);
1727 if (frozen > 0)
1728 sysfs_set_str(&info, NULL, "sync_action","idle");
1729 return !test && busy ? 2 : 1;
1730 }
1731
1732 int autodetect(void)
1733 {
1734 /* Open any md device, and issue the RAID_AUTORUN ioctl */
1735 int rv = 1;
1736 int fd = dev_open("9:0", O_RDONLY);
1737 if (fd >= 0) {
1738 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
1739 rv = 0;
1740 close(fd);
1741 }
1742 return rv;
1743 }
1744
1745 int Update_subarray(char *dev, char *subarray, enum update_opt update,
1746 struct mddev_ident *ident, int verbose)
1747 {
1748 struct supertype supertype, *st = &supertype;
1749 int fd, rv = 2;
1750 struct mdinfo *info = NULL;
1751 char *update_verb = map_num(update_options, update);
1752 bool allow_active = update == UOPT_PPL || update == UOPT_NO_PPL;
1753
1754 memset(st, 0, sizeof(*st));
1755
1756 fd = open_subarray(dev, subarray, st, verbose < 0);
1757 if (fd < 0)
1758 return 2;
1759
1760 if (!st->ss->update_subarray) {
1761 if (verbose >= 0)
1762 pr_err("Operation not supported for %s metadata\n",
1763 st->ss->name);
1764 goto free_super;
1765 }
1766
1767 if (!allow_active && is_subarray_active(subarray, st->devnm)) {
1768 if (verbose >= 0)
1769 pr_err("Subarray %s in %s is active, cannot update %s\n",
1770 subarray, dev, update_verb);
1771 goto free_super;
1772 }
1773
1774 if (mdmon_running(st->devnm))
1775 st->update_tail = &st->updates;
1776
1777 info = st->ss->container_content(st, subarray);
1778
1779 if (update == UOPT_PPL && !is_level456(info->array.level)) {
1780 pr_err("RWH policy ppl is supported only for raid4, raid5 and raid6.\n");
1781 goto free_super;
1782 }
1783
1784 rv = st->ss->update_subarray(st, subarray, update, ident);
1785
1786 if (rv) {
1787 if (verbose >= 0)
1788 pr_err("Failed to update %s of subarray-%s in %s\n",
1789 update_verb, subarray, dev);
1790 } else if (st->update_tail)
1791 flush_metadata_updates(st);
1792 else
1793 st->ss->sync_metadata(st);
1794
1795 if (rv == 0 && update == UOPT_NAME && verbose >= 0)
1796 pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
1797 subarray, dev);
1798
1799 free_super:
1800 if (info)
1801 free(info);
1802 st->ss->free_super(st);
1803 close(fd);
1804
1805 return rv;
1806 }
1807
1808 /* Move spare from one array to another If adding to destination array fails
1809 * add back to original array.
1810 * Returns 1 on success, 0 on failure */
1811 int move_spare(char *from_devname, char *to_devname, dev_t devid)
1812 {
1813 struct mddev_dev devlist;
1814 char devname[20];
1815
1816 /* try to remove and add */
1817 int fd1 = open(to_devname, O_RDONLY);
1818 int fd2 = open(from_devname, O_RDONLY);
1819
1820 if (fd1 < 0 || fd2 < 0) {
1821 if (fd1 >= 0)
1822 close(fd1);
1823 if (fd2 >= 0)
1824 close(fd2);
1825 return 0;
1826 }
1827
1828 devlist.next = NULL;
1829 devlist.used = 0;
1830 devlist.writemostly = FlagDefault;
1831 devlist.failfast = FlagDefault;
1832 devlist.devname = devname;
1833 sprintf(devname, "%d:%d", major(devid), minor(devid));
1834
1835 devlist.disposition = 'r';
1836 if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, UOPT_UNDEFINED, 0) == 0) {
1837 devlist.disposition = 'a';
1838 if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
1839 UOPT_UNDEFINED, 0) == 0) {
1840 /* make sure manager is aware of changes */
1841 ping_manager(to_devname);
1842 ping_manager(from_devname);
1843 close(fd1);
1844 close(fd2);
1845 return 1;
1846 }
1847 else
1848 Manage_subdevs(from_devname, fd2, &devlist,
1849 -1, 0, UOPT_UNDEFINED, 0);
1850 }
1851 close(fd1);
1852 close(fd2);
1853 return 0;
1854 }