Stop managed arrays more carefully.
[thirdparty/mdadm.git] / Manage.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 #define REGISTER_DEV _IO (MD_MAJOR, 1)
35 #define START_MD _IO (MD_MAJOR, 2)
36 #define STOP_MD _IO (MD_MAJOR, 3)
37
38 int Manage_ro(char *devname, int fd, int readonly)
39 {
40 /* switch to readonly or rw
41 *
42 * requires >= 0.90.0
43 * first check that array is runing
44 * use RESTART_ARRAY_RW or STOP_ARRAY_RO
45 *
46 */
47 mdu_array_info_t array;
48
49 if (md_get_version(fd) < 9000) {
50 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
51 return 1;
52 }
53 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
54 fprintf(stderr, Name ": %s does not appear to be active.\n",
55 devname);
56 return 1;
57 }
58
59 if (readonly>0) {
60 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
61 fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
62 devname, strerror(errno));
63 return 1;
64 }
65 } else if (readonly < 0) {
66 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
67 fprintf(stderr, Name ": failed to set writable for %s: %s\n",
68 devname, strerror(errno));
69 return 1;
70 }
71 }
72 return 0;
73 }
74
75 #ifndef MDASSEMBLE
76
77 int Manage_runstop(char *devname, int fd, int runstop, int quiet)
78 {
79 /* Run or stop the array. array must already be configured
80 * required >= 0.90.0
81 */
82 mdu_param_t param; /* unused */
83
84 if (runstop == -1 && md_get_version(fd) < 9000) {
85 if (ioctl(fd, STOP_MD, 0)) {
86 if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n",
87 devname, strerror(errno));
88 return 1;
89 }
90 }
91
92 if (md_get_version(fd) < 9000) {
93 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
94 return 1;
95 }
96 /*
97 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
98 fprintf(stderr, Name ": %s does not appear to be active.\n",
99 devname);
100 return 1;
101 }
102 */
103 if (runstop>0) {
104 if (ioctl(fd, RUN_ARRAY, &param)) {
105 fprintf(stderr, Name ": failed to run array %s: %s\n",
106 devname, strerror(errno));
107 return 1;
108 }
109 if (quiet <= 0)
110 fprintf(stderr, Name ": started %s\n", devname);
111 } else if (runstop < 0){
112 struct map_ent *map = NULL;
113 struct stat stb;
114 struct mdinfo *mdi;
115 /* If this is an mdmon managed array, just write 'inactive'
116 * to the array state and let mdmon clear up.
117 */
118 mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
119 if (mdi &&
120 mdi->array.level > 0 &&
121 mdi->text_version[0] == '/') {
122 char *cp;
123
124 /* This is mdmon managed. */
125 close(fd);
126 if (sysfs_set_str(mdi, NULL,
127 "array_state", "inactive") < 0) {
128 if (quiet==0)
129 fprintf(stderr, Name
130 ": fail to stop array %s: %s\n",
131 devname, strerror(errno));
132 return 1;
133 }
134
135 /* Give monitor a chance to act */
136 cp = strchr(mdi->text_version+1, '/');
137 if (*cp)
138 *cp = 0;
139 ping_monitor(mdi->text_version+1);
140
141 fd = open(devname, O_RDONLY);
142 }
143 if (mdi)
144 sysfs_free(mdi);
145
146 if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
147 if (quiet==0)
148 fprintf(stderr, Name
149 ": fail to stop array %s: %s\n",
150 devname, strerror(errno));
151 return 1;
152 }
153
154 if (quiet <= 0)
155 fprintf(stderr, Name ": stopped %s\n", devname);
156 if (fd >= 0 && fstat(fd, &stb) == 0) {
157 int devnum;
158 if (major(stb.st_rdev) == MD_MAJOR)
159 devnum = minor(stb.st_rdev);
160 else
161 devnum = -1-(minor(stb.st_rdev)>>6);
162 map_delete(&map, devnum);
163 map_write(map);
164 map_free(map);
165 }
166 }
167 return 0;
168 }
169
170 int Manage_resize(char *devname, int fd, long long size, int raid_disks)
171 {
172 mdu_array_info_t info;
173 if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
174 fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
175 devname, strerror(errno));
176 return 1;
177 }
178 if (size >= 0)
179 info.size = size;
180 if (raid_disks > 0)
181 info.raid_disks = raid_disks;
182 if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
183 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
184 devname, strerror(errno));
185 return 1;
186 }
187 return 0;
188 }
189
190 int Manage_reconfig(char *devname, int fd, int layout)
191 {
192 mdu_array_info_t info;
193 if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
194 fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
195 devname, strerror(errno));
196 return 1;
197 }
198 info.layout = layout;
199 printf("layout set to %d\n", info.layout);
200 if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
201 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
202 devname, strerror(errno));
203 return 1;
204 }
205 return 0;
206 }
207
208 int Manage_subdevs(char *devname, int fd,
209 mddev_dev_t devlist, int verbose)
210 {
211 /* do something to each dev.
212 * devmode can be
213 * 'a' - add the device
214 * try HOT_ADD_DISK
215 * If that fails EINVAL, try ADD_NEW_DISK
216 * 'r' - remove the device HOT_REMOVE_DISK
217 * device can be 'faulty' or 'detached' in which case all
218 * matching devices are removed.
219 * 'f' - set the device faulty SET_DISK_FAULTY
220 * device can be 'detached' in which case any device that
221 * is inaccessible will be marked faulty.
222 */
223 mdu_array_info_t array;
224 mdu_disk_info_t disc;
225 unsigned long long array_size;
226 mddev_dev_t dv, next = NULL;
227 struct stat stb;
228 int j, jnext = 0;
229 int tfd;
230 struct supertype *st, *tst;
231 int duuid[4];
232 int ouuid[4];
233 int lfd = -1;
234
235 if (ioctl(fd, GET_ARRAY_INFO, &array)) {
236 fprintf(stderr, Name ": cannot get array info for %s\n",
237 devname);
238 return 1;
239 }
240
241 /* array.size is only 32 bit and may be truncated.
242 * So read from sysfs if possible, and record number of sectors
243 */
244
245 array_size = get_component_size(fd);
246 if (array_size <= 0)
247 array_size = array.size * 2;
248
249 tst = super_by_fd(fd);
250 if (!tst) {
251 fprintf(stderr, Name ": unsupport array - version %d.%d\n",
252 array.major_version, array.minor_version);
253 return 1;
254 }
255
256 for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
257 unsigned long long ldsize;
258 char dvname[20];
259 char *dnprintable = dv->devname;
260 int err;
261
262 next = dv->next;
263 jnext = 0;
264
265 if (strcmp(dv->devname, "failed")==0 ||
266 strcmp(dv->devname, "faulty")==0) {
267 if (dv->disposition != 'r') {
268 fprintf(stderr, Name ": %s only meaningful "
269 "with -r, not -%c\n",
270 dv->devname, dv->disposition);
271 return 1;
272 }
273 for (; j < array.raid_disks + array.nr_disks ; j++) {
274 disc.number = j;
275 if (ioctl(fd, GET_DISK_INFO, &disc))
276 continue;
277 if (disc.major == 0 && disc.minor == 0)
278 continue;
279 if ((disc.state & 1) == 0) /* faulty */
280 continue;
281 stb.st_rdev = makedev(disc.major, disc.minor);
282 next = dv;
283 jnext = j+1;
284 sprintf(dvname,"%d:%d", disc.major, disc.minor);
285 dnprintable = dvname;
286 break;
287 }
288 if (jnext == 0)
289 continue;
290 } else if (strcmp(dv->devname, "detached") == 0) {
291 if (dv->disposition != 'r' && dv->disposition != 'f') {
292 fprintf(stderr, Name ": %s only meaningful "
293 "with -r of -f, not -%c\n",
294 dv->devname, dv->disposition);
295 return 1;
296 }
297 for (; j < array.raid_disks + array.nr_disks; j++) {
298 int sfd;
299 disc.number = j;
300 if (ioctl(fd, GET_DISK_INFO, &disc))
301 continue;
302 if (disc.major == 0 && disc.minor == 0)
303 continue;
304 sprintf(dvname,"%d:%d", disc.major, disc.minor);
305 sfd = dev_open(dvname, O_RDONLY);
306 if (sfd >= 0) {
307 close(sfd);
308 continue;
309 }
310 if (dv->disposition == 'f' &&
311 (disc.state & 1) == 1) /* already faulty */
312 continue;
313 if (errno != ENXIO)
314 continue;
315 stb.st_rdev = makedev(disc.major, disc.minor);
316 next = dv;
317 jnext = j+1;
318 dnprintable = dvname;
319 break;
320 }
321 if (jnext == 0)
322 continue;
323 } else {
324 j = 0;
325
326 if (stat(dv->devname, &stb)) {
327 fprintf(stderr, Name ": cannot find %s: %s\n",
328 dv->devname, strerror(errno));
329 return 1;
330 }
331 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
332 fprintf(stderr, Name ": %s is not a "
333 "block device.\n",
334 dv->devname);
335 return 1;
336 }
337 }
338 switch(dv->disposition){
339 default:
340 fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
341 dv->devname, dv->disposition);
342 return 1;
343 case 'a':
344 /* add the device */
345 if (tst->subarray[0]) {
346 fprintf(stderr, Name ": Cannot add disks to a"
347 " \'member\' array, perform this"
348 " operation on the parent container\n");
349 return 1;
350 }
351 /* Make sure it isn't in use (in 2.6 or later) */
352 tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
353 if (tfd < 0) {
354 fprintf(stderr, Name ": Cannot open %s: %s\n",
355 dv->devname, strerror(errno));
356 return 1;
357 }
358 remove_partitions(tfd);
359
360 st = dup_super(tst);
361
362 if (array.not_persistent==0)
363 st->ss->load_super(st, tfd, NULL);
364
365 if (!get_dev_size(tfd, dv->devname, &ldsize)) {
366 close(tfd);
367 return 1;
368 }
369 close(tfd);
370
371
372 if (!tst->ss->external &&
373 array.major_version == 0 &&
374 md_get_version(fd)%100 < 2) {
375 if (ioctl(fd, HOT_ADD_DISK,
376 (unsigned long)stb.st_rdev)==0) {
377 if (verbose >= 0)
378 fprintf(stderr, Name ": hot added %s\n",
379 dv->devname);
380 continue;
381 }
382
383 fprintf(stderr, Name ": hot add failed for %s: %s\n",
384 dv->devname, strerror(errno));
385 return 1;
386 }
387
388 if (array.not_persistent == 0) {
389
390 /* Make sure device is large enough */
391 if (tst->ss->avail_size(tst, ldsize/512) <
392 array_size) {
393 fprintf(stderr, Name ": %s not large enough to join array\n",
394 dv->devname);
395 return 1;
396 }
397
398 /* need to find a sample superblock to copy, and
399 * a spare slot to use
400 */
401 for (j = 0; j < tst->max_devs; j++) {
402 char *dev;
403 int dfd;
404 disc.number = j;
405 if (ioctl(fd, GET_DISK_INFO, &disc))
406 continue;
407 if (disc.major==0 && disc.minor==0)
408 continue;
409 if ((disc.state & 4)==0) continue; /* sync */
410 /* Looks like a good device to try */
411 dev = map_dev(disc.major, disc.minor, 1);
412 if (!dev) continue;
413 dfd = dev_open(dev, O_RDONLY);
414 if (dfd < 0) continue;
415 if (tst->ss->load_super(tst, dfd,
416 NULL)) {
417 close(dfd);
418 continue;
419 }
420 close(dfd);
421 break;
422 }
423 if (!tst->sb) {
424 fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
425 return 1;
426 }
427 /* Possibly this device was recently part of the array
428 * and was temporarily removed, and is now being re-added.
429 * If so, we can simply re-add it.
430 */
431 tst->ss->uuid_from_super(tst, duuid);
432
433 /* re-add doesn't work for version-1 superblocks
434 * before 2.6.18 :-(
435 */
436 if (array.major_version == 1 &&
437 get_linux_version() <= 2006018)
438 ;
439 else if (st->sb) {
440 st->ss->uuid_from_super(st, ouuid);
441 if (memcmp(duuid, ouuid, sizeof(ouuid))==0) {
442 /* looks close enough for now. Kernel
443 * will worry about whether a bitmap
444 * based reconstruction is possible.
445 */
446 struct mdinfo mdi;
447 st->ss->getinfo_super(st, &mdi);
448 disc.major = major(stb.st_rdev);
449 disc.minor = minor(stb.st_rdev);
450 disc.number = mdi.disk.number;
451 disc.raid_disk = mdi.disk.raid_disk;
452 disc.state = mdi.disk.state;
453 if (dv->writemostly)
454 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
455 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
456 if (verbose >= 0)
457 fprintf(stderr, Name ": re-added %s\n", dv->devname);
458 continue;
459 }
460 /* fall back on normal-add */
461 }
462 }
463 } else {
464 /* non-persistent. Must ensure that new drive
465 * is at least array.size big.
466 */
467 if (ldsize/512 < array_size) {
468 fprintf(stderr, Name ": %s not large enough to join array\n",
469 dv->devname);
470 return 1;
471 }
472 }
473 /* in 2.6.17 and earlier, version-1 superblocks won't
474 * use the number we write, but will choose a free number.
475 * we must choose the same free number, which requires
476 * starting at 'raid_disks' and counting up
477 */
478 for (j = array.raid_disks; j< tst->max_devs; j++) {
479 disc.number = j;
480 if (ioctl(fd, GET_DISK_INFO, &disc))
481 break;
482 if (disc.major==0 && disc.minor==0)
483 break;
484 if (disc.state & 8) /* removed */
485 break;
486 }
487 disc.major = major(stb.st_rdev);
488 disc.minor = minor(stb.st_rdev);
489 disc.number =j;
490 disc.state = 0;
491 if (array.not_persistent==0) {
492 int dfd;
493 if (dv->writemostly)
494 disc.state |= 1 << MD_DISK_WRITEMOSTLY;
495 dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
496 tst->ss->add_to_super(tst, &disc, dfd,
497 dv->devname);
498 /* write_init_super will close 'dfd' */
499 if (tst->ss->write_init_super(tst))
500 return 1;
501 } else if (dv->re_add) {
502 /* this had better be raid1.
503 * As we are "--re-add"ing we must find a spare slot
504 * to fill.
505 */
506 char *used = malloc(array.raid_disks);
507 memset(used, 0, array.raid_disks);
508 for (j=0; j< tst->max_devs; j++) {
509 mdu_disk_info_t disc2;
510 disc2.number = j;
511 if (ioctl(fd, GET_DISK_INFO, &disc2))
512 continue;
513 if (disc2.major==0 && disc2.minor==0)
514 continue;
515 if (disc2.state & 8) /* removed */
516 continue;
517 if (disc2.raid_disk < 0)
518 continue;
519 if (disc2.raid_disk > array.raid_disks)
520 continue;
521 used[disc2.raid_disk] = 1;
522 }
523 for (j=0 ; j<array.raid_disks; j++)
524 if (!used[j]) {
525 disc.raid_disk = j;
526 disc.state |= (1<<MD_DISK_SYNC);
527 break;
528 }
529 }
530 if (dv->writemostly)
531 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
532 if (ioctl(fd,ADD_NEW_DISK, &disc)) {
533 fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
534 dv->devname, j, strerror(errno));
535 return 1;
536 }
537 if (verbose >= 0)
538 fprintf(stderr, Name ": added %s\n", dv->devname);
539 break;
540
541 case 'r':
542 /* hot remove */
543 if (tst->subarray[0]) {
544 fprintf(stderr, Name ": Cannot remove disks from a"
545 " \'member\' array, perform this"
546 " operation on the parent container\n");
547 return 1;
548 }
549 if (tst->ss->external) {
550 /* To remove a device from a container, we must
551 * check that it isn't in use in an array.
552 * This involves looking in the 'holders'
553 * directory - there must be just one entry,
554 * the container.
555 * To ensure that it doesn't get used as a
556 * hold spare while we are checking, we
557 * get an O_EXCL open on the container
558 */
559 int dnum = fd2devnum(fd);
560 lfd = open_dev_excl(dnum);
561 if (lfd < 0) {
562 fprintf(stderr, Name
563 ": Cannot get exclusive access "
564 " to container - odd\n");
565 return 1;
566 }
567 if (!sysfs_unique_holder(dnum, stb.st_rdev)) {
568 fprintf(stderr, Name
569 ": %s is %s, cannot remove.\n",
570 dnprintable,
571 errno == EEXIST ? "still in use":
572 "not a member");
573 close(lfd);
574 return 1;
575 }
576 }
577 /* FIXME check that it is a current member */
578 err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
579 if (err && errno == ENODEV) {
580 /* Old kernels rejected this if no personality
581 * registered */
582 struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
583 struct mdinfo *dv = NULL;
584 if (sra)
585 dv = sra->devs;
586 for ( ; dv ; dv=dv->next)
587 if (dv->disk.major == major(stb.st_rdev) &&
588 dv->disk.minor == minor(stb.st_rdev))
589 break;
590 if (dv)
591 err = sysfs_set_str(sra, dv,
592 "state", "remove");
593 else
594 err = -1;
595 if (sra)
596 sysfs_free(sra);
597 }
598 if (err) {
599 fprintf(stderr, Name ": hot remove failed "
600 "for %s: %s\n", dnprintable,
601 strerror(errno));
602 if (lfd >= 0)
603 close(lfd);
604 return 1;
605 }
606 close(lfd);
607 if (verbose >= 0)
608 fprintf(stderr, Name ": hot removed %s\n",
609 dnprintable);
610 break;
611
612 case 'f': /* set faulty */
613 /* FIXME check current member */
614 if (ioctl(fd, SET_DISK_FAULTY, (unsigned long) stb.st_rdev)) {
615 fprintf(stderr, Name ": set device faulty failed for %s: %s\n",
616 dnprintable, strerror(errno));
617 return 1;
618 }
619 if (verbose >= 0)
620 fprintf(stderr, Name ": set %s faulty in %s\n",
621 dnprintable, devname);
622 break;
623 }
624 }
625 return 0;
626
627 }
628
629 int autodetect(void)
630 {
631 /* Open any md device, and issue the RAID_AUTORUN ioctl */
632 int rv = 1;
633 int fd = dev_open("9:0", O_RDONLY);
634 if (fd >= 0) {
635 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
636 rv = 0;
637 close(fd);
638 }
639 return rv;
640 }
641 #endif