Stop managed arrays more carefully.
[thirdparty/mdadm.git] / Manage.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neilb@cse.unsw.edu.au>
23  *    Paper: Neil Brown
24  *           School of Computer Science and Engineering
25  *           The University of New South Wales
26  *           Sydney, 2052
27  *           Australia
28  */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 #define REGISTER_DEV            _IO (MD_MAJOR, 1)
35 #define START_MD                _IO (MD_MAJOR, 2)
36 #define STOP_MD                 _IO (MD_MAJOR, 3)
37
38 int Manage_ro(char *devname, int fd, int readonly)
39 {
40         /* switch to readonly or rw
41          *
42          * requires >= 0.90.0
43          * first check that array is runing
44          * use RESTART_ARRAY_RW or STOP_ARRAY_RO
45          *
46          */
47         mdu_array_info_t array;
48
49         if (md_get_version(fd) < 9000) {
50                 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
51                 return 1;
52         }
53         if (ioctl(fd, GET_ARRAY_INFO, &array)) {
54                 fprintf(stderr, Name ": %s does not appear to be active.\n",
55                         devname);
56                 return 1;
57         }
58
59         if (readonly>0) {
60                 if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
61                         fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
62                                 devname, strerror(errno));
63                         return 1;
64                 }
65         } else if (readonly < 0) {
66                 if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
67                         fprintf(stderr, Name ": failed to set writable for %s: %s\n",
68                                 devname, strerror(errno));
69                         return 1;
70                 }
71         }
72         return 0;
73 }
74
75 #ifndef MDASSEMBLE
76
77 int Manage_runstop(char *devname, int fd, int runstop, int quiet)
78 {
79         /* Run or stop the array. array must already be configured
80          * required >= 0.90.0
81          */
82         mdu_param_t param; /* unused */
83
84         if (runstop == -1 && md_get_version(fd) < 9000) {
85                 if (ioctl(fd, STOP_MD, 0)) {
86                         if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n",
87                                             devname, strerror(errno));
88                         return 1;
89                 }
90         }
91
92         if (md_get_version(fd) < 9000) {
93                 fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
94                 return 1;
95         }
96         /*
97         if (ioctl(fd, GET_ARRAY_INFO, &array)) {
98                 fprintf(stderr, Name ": %s does not appear to be active.\n",
99                         devname);
100                 return 1;
101         }
102         */
103         if (runstop>0) {
104                 if (ioctl(fd, RUN_ARRAY, &param)) {
105                         fprintf(stderr, Name ": failed to run array %s: %s\n",
106                                 devname, strerror(errno));
107                         return 1;
108                 }
109                 if (quiet <= 0)
110                         fprintf(stderr, Name ": started %s\n", devname);
111         } else if (runstop < 0){
112                 struct map_ent *map = NULL;
113                 struct stat stb;
114                 struct mdinfo *mdi;
115                 /* If this is an mdmon managed array, just write 'inactive'
116                  * to the array state and let mdmon clear up.
117                  */
118                 mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
119                 if (mdi &&
120                     mdi->array.level > 0 &&
121                     mdi->text_version[0] == '/') {
122                         char *cp;
123
124                         /* This is mdmon managed. */
125                         close(fd);
126                         if (sysfs_set_str(mdi, NULL,
127                                           "array_state", "inactive") < 0) {
128                                 if (quiet==0)
129                                         fprintf(stderr, Name
130                                                 ": fail to stop array %s: %s\n",
131                                                 devname, strerror(errno));
132                                 return 1;
133                         }
134
135                         /* Give monitor a chance to act */
136                         cp = strchr(mdi->text_version+1, '/');
137                         if (*cp)
138                                 *cp = 0;
139                         ping_monitor(mdi->text_version+1);
140
141                         fd = open(devname, O_RDONLY);
142                 }
143                 if (mdi)
144                         sysfs_free(mdi);
145
146                 if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
147                         if (quiet==0)
148                                 fprintf(stderr, Name
149                                         ": fail to stop array %s: %s\n",
150                                         devname, strerror(errno));
151                         return 1;
152                 }
153
154                 if (quiet <= 0)
155                         fprintf(stderr, Name ": stopped %s\n", devname);
156                 if (fd >= 0 && fstat(fd, &stb) == 0) {
157                         int devnum;
158                         if (major(stb.st_rdev) == MD_MAJOR)
159                                 devnum = minor(stb.st_rdev);
160                         else
161                                 devnum = -1-(minor(stb.st_rdev)>>6);
162                         map_delete(&map, devnum);
163                         map_write(map);
164                         map_free(map);
165                 }
166         }
167         return 0;
168 }
169
170 int Manage_resize(char *devname, int fd, long long size, int raid_disks)
171 {
172         mdu_array_info_t info;
173         if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
174                 fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
175                         devname, strerror(errno));
176                 return 1;
177         }
178         if (size >= 0)
179                 info.size = size;
180         if (raid_disks > 0)
181                 info.raid_disks = raid_disks;
182         if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
183                 fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
184                         devname, strerror(errno));
185                 return 1;
186         }
187         return 0;
188 }
189
190 int Manage_reconfig(char *devname, int fd, int layout)
191 {
192         mdu_array_info_t info;
193         if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
194                 fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
195                         devname, strerror(errno));
196                 return 1;
197         }
198         info.layout = layout;
199         printf("layout set to %d\n", info.layout);
200         if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
201                 fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
202                         devname, strerror(errno));
203                 return 1;
204         }
205         return 0;
206 }
207
208 int Manage_subdevs(char *devname, int fd,
209                    mddev_dev_t devlist, int verbose)
210 {
211         /* do something to each dev.
212          * devmode can be
213          *  'a' - add the device
214          *         try HOT_ADD_DISK
215          *         If that fails EINVAL, try ADD_NEW_DISK
216          *  'r' - remove the device HOT_REMOVE_DISK
217          *        device can be 'faulty' or 'detached' in which case all
218          *        matching devices are removed.
219          *  'f' - set the device faulty SET_DISK_FAULTY
220          *        device can be 'detached' in which case any device that
221          *        is inaccessible will be marked faulty.
222          */
223         mdu_array_info_t array;
224         mdu_disk_info_t disc;
225         unsigned long long array_size;
226         mddev_dev_t dv, next = NULL;
227         struct stat stb;
228         int j, jnext = 0;
229         int tfd;
230         struct supertype *st, *tst;
231         int duuid[4];
232         int ouuid[4];
233         int lfd = -1;
234
235         if (ioctl(fd, GET_ARRAY_INFO, &array)) {
236                 fprintf(stderr, Name ": cannot get array info for %s\n",
237                         devname);
238                 return 1;
239         }
240
241         /* array.size is only 32 bit and may be truncated.
242          * So read from sysfs if possible, and record number of sectors
243          */
244
245         array_size = get_component_size(fd);
246         if (array_size <= 0)
247                 array_size = array.size * 2;
248
249         tst = super_by_fd(fd);
250         if (!tst) {
251                 fprintf(stderr, Name ": unsupport array - version %d.%d\n",
252                         array.major_version, array.minor_version);
253                 return 1;
254         }
255
256         for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
257                 unsigned long long ldsize;
258                 char dvname[20];
259                 char *dnprintable = dv->devname;
260                 int err;
261
262                 next = dv->next;
263                 jnext = 0;
264
265                 if (strcmp(dv->devname, "failed")==0 ||
266                     strcmp(dv->devname, "faulty")==0) {
267                         if (dv->disposition != 'r') {
268                                 fprintf(stderr, Name ": %s only meaningful "
269                                         "with -r, not -%c\n",
270                                         dv->devname, dv->disposition);
271                                 return 1;
272                         }
273                         for (; j < array.raid_disks + array.nr_disks ; j++) {
274                                 disc.number = j;
275                                 if (ioctl(fd, GET_DISK_INFO, &disc))
276                                         continue;
277                                 if (disc.major == 0 && disc.minor == 0)
278                                         continue;
279                                 if ((disc.state & 1) == 0) /* faulty */
280                                         continue;
281                                 stb.st_rdev = makedev(disc.major, disc.minor);
282                                 next = dv;
283                                 jnext = j+1;
284                                 sprintf(dvname,"%d:%d", disc.major, disc.minor);
285                                 dnprintable = dvname;
286                                 break;
287                         }
288                         if (jnext == 0)
289                                 continue;
290                 } else if (strcmp(dv->devname, "detached") == 0) {
291                         if (dv->disposition != 'r' && dv->disposition != 'f') {
292                                 fprintf(stderr, Name ": %s only meaningful "
293                                         "with -r of -f, not -%c\n",
294                                         dv->devname, dv->disposition);
295                                 return 1;
296                         }
297                         for (; j < array.raid_disks + array.nr_disks; j++) {
298                                 int sfd;
299                                 disc.number = j;
300                                 if (ioctl(fd, GET_DISK_INFO, &disc))
301                                         continue;
302                                 if (disc.major == 0 && disc.minor == 0)
303                                         continue;
304                                 sprintf(dvname,"%d:%d", disc.major, disc.minor);
305                                 sfd = dev_open(dvname, O_RDONLY);
306                                 if (sfd >= 0) {
307                                         close(sfd);
308                                         continue;
309                                 }
310                                 if (dv->disposition == 'f' &&
311                                     (disc.state & 1) == 1) /* already faulty */
312                                         continue;
313                                 if (errno != ENXIO)
314                                         continue;
315                                 stb.st_rdev = makedev(disc.major, disc.minor);
316                                 next = dv;
317                                 jnext = j+1;
318                                 dnprintable = dvname;
319                                 break;
320                         }
321                         if (jnext == 0)
322                                 continue;
323                 } else {
324                         j = 0;
325
326                         if (stat(dv->devname, &stb)) {
327                                 fprintf(stderr, Name ": cannot find %s: %s\n",
328                                         dv->devname, strerror(errno));
329                                 return 1;
330                         }
331                         if ((stb.st_mode & S_IFMT) != S_IFBLK) {
332                                 fprintf(stderr, Name ": %s is not a "
333                                         "block device.\n",
334                                         dv->devname);
335                                 return 1;
336                         }
337                 }
338                 switch(dv->disposition){
339                 default:
340                         fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
341                                 dv->devname, dv->disposition);
342                         return 1;
343                 case 'a':
344                         /* add the device */
345                         if (tst->subarray[0]) {
346                                 fprintf(stderr, Name ": Cannot add disks to a"
347                                         " \'member\' array, perform this"
348                                         " operation on the parent container\n");
349                                 return 1;
350                         }
351                         /* Make sure it isn't in use (in 2.6 or later) */
352                         tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
353                         if (tfd < 0) {
354                                 fprintf(stderr, Name ": Cannot open %s: %s\n",
355                                         dv->devname, strerror(errno));
356                                 return 1;
357                         }
358                         remove_partitions(tfd);
359
360                         st = dup_super(tst);
361
362                         if (array.not_persistent==0)
363                                 st->ss->load_super(st, tfd, NULL);
364
365                         if (!get_dev_size(tfd, dv->devname, &ldsize)) {
366                                 close(tfd);
367                                 return 1;
368                         }
369                         close(tfd);
370
371
372                         if (!tst->ss->external &&
373                             array.major_version == 0 &&
374                             md_get_version(fd)%100 < 2) {
375                                 if (ioctl(fd, HOT_ADD_DISK,
376                                           (unsigned long)stb.st_rdev)==0) {
377                                         if (verbose >= 0)
378                                                 fprintf(stderr, Name ": hot added %s\n",
379                                                         dv->devname);
380                                         continue;
381                                 }
382
383                                 fprintf(stderr, Name ": hot add failed for %s: %s\n",
384                                         dv->devname, strerror(errno));
385                                 return 1;
386                         }
387
388                         if (array.not_persistent == 0) {
389
390                                 /* Make sure device is large enough */
391                                 if (tst->ss->avail_size(tst, ldsize/512) <
392                                     array_size) {
393                                         fprintf(stderr, Name ": %s not large enough to join array\n",
394                                                 dv->devname);
395                                         return 1;
396                                 }
397
398                                 /* need to find a sample superblock to copy, and
399                                  * a spare slot to use
400                                  */
401                                 for (j = 0; j < tst->max_devs; j++) {
402                                         char *dev;
403                                         int dfd;
404                                         disc.number = j;
405                                         if (ioctl(fd, GET_DISK_INFO, &disc))
406                                                 continue;
407                                         if (disc.major==0 && disc.minor==0)
408                                                 continue;
409                                         if ((disc.state & 4)==0) continue; /* sync */
410                                         /* Looks like a good device to try */
411                                         dev = map_dev(disc.major, disc.minor, 1);
412                                         if (!dev) continue;
413                                         dfd = dev_open(dev, O_RDONLY);
414                                         if (dfd < 0) continue;
415                                         if (tst->ss->load_super(tst, dfd,
416                                                                 NULL)) {
417                                                 close(dfd);
418                                                 continue;
419                                         }
420                                         close(dfd);
421                                         break;
422                                 }
423                                 if (!tst->sb) {
424                                         fprintf(stderr, Name ": cannot find valid superblock in this array - HELP\n");
425                                         return 1;
426                                 }
427                                 /* Possibly this device was recently part of the array
428                                  * and was temporarily removed, and is now being re-added.
429                                  * If so, we can simply re-add it.
430                                  */
431                                 tst->ss->uuid_from_super(tst, duuid);
432
433                                 /* re-add doesn't work for version-1 superblocks
434                                  * before 2.6.18 :-(
435                                  */
436                                 if (array.major_version == 1 &&
437                                     get_linux_version() <= 2006018)
438                                         ;
439                                 else if (st->sb) {
440                                         st->ss->uuid_from_super(st, ouuid);
441                                         if (memcmp(duuid, ouuid, sizeof(ouuid))==0) {
442                                                 /* looks close enough for now.  Kernel
443                                                  * will worry about whether a bitmap
444                                                  * based reconstruction is possible.
445                                                  */
446                                                 struct mdinfo mdi;
447                                                 st->ss->getinfo_super(st, &mdi);
448                                                 disc.major = major(stb.st_rdev);
449                                                 disc.minor = minor(stb.st_rdev);
450                                                 disc.number = mdi.disk.number;
451                                                 disc.raid_disk = mdi.disk.raid_disk;
452                                                 disc.state = mdi.disk.state;
453                                                 if (dv->writemostly)
454                                                         disc.state |= 1 << MD_DISK_WRITEMOSTLY;
455                                                 if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
456                                                         if (verbose >= 0)
457                                                                 fprintf(stderr, Name ": re-added %s\n", dv->devname);
458                                                         continue;
459                                                 }
460                                                 /* fall back on normal-add */
461                                         }
462                                 }
463                         } else {
464                                 /* non-persistent. Must ensure that new drive
465                                  * is at least array.size big.
466                                  */
467                                 if (ldsize/512 < array_size) {
468                                         fprintf(stderr, Name ": %s not large enough to join array\n",
469                                                 dv->devname);
470                                         return 1;
471                                 }
472                         }
473                         /* in 2.6.17 and earlier, version-1 superblocks won't
474                          * use the number we write, but will choose a free number.
475                          * we must choose the same free number, which requires
476                          * starting at 'raid_disks' and counting up
477                          */
478                         for (j = array.raid_disks; j< tst->max_devs; j++) {
479                                 disc.number = j;
480                                 if (ioctl(fd, GET_DISK_INFO, &disc))
481                                         break;
482                                 if (disc.major==0 && disc.minor==0)
483                                         break;
484                                 if (disc.state & 8) /* removed */
485                                         break;
486                         }
487                         disc.major = major(stb.st_rdev);
488                         disc.minor = minor(stb.st_rdev);
489                         disc.number =j;
490                         disc.state = 0;
491                         if (array.not_persistent==0) {
492                                 int dfd;
493                                 if (dv->writemostly)
494                                         disc.state |= 1 << MD_DISK_WRITEMOSTLY;
495                                 dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
496                                 tst->ss->add_to_super(tst, &disc, dfd,
497                                                       dv->devname);
498                                 /* write_init_super will close 'dfd' */
499                                 if (tst->ss->write_init_super(tst))
500                                         return 1;
501                         } else if (dv->re_add) {
502                                 /*  this had better be raid1.
503                                  * As we are "--re-add"ing we must find a spare slot
504                                  * to fill.
505                                  */
506                                 char *used = malloc(array.raid_disks);
507                                 memset(used, 0, array.raid_disks);
508                                 for (j=0; j< tst->max_devs; j++) {
509                                         mdu_disk_info_t disc2;
510                                         disc2.number = j;
511                                         if (ioctl(fd, GET_DISK_INFO, &disc2))
512                                                 continue;
513                                         if (disc2.major==0 && disc2.minor==0)
514                                                 continue;
515                                         if (disc2.state & 8) /* removed */
516                                                 continue;
517                                         if (disc2.raid_disk < 0)
518                                                 continue;
519                                         if (disc2.raid_disk > array.raid_disks)
520                                                 continue;
521                                         used[disc2.raid_disk] = 1;
522                                 }
523                                 for (j=0 ; j<array.raid_disks; j++)
524                                         if (!used[j]) {
525                                                 disc.raid_disk = j;
526                                                 disc.state |= (1<<MD_DISK_SYNC);
527                                                 break;
528                                         }
529                         }
530                         if (dv->writemostly)
531                                 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
532                         if (ioctl(fd,ADD_NEW_DISK, &disc)) {
533                                 fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
534                                         dv->devname, j, strerror(errno));
535                                 return 1;
536                         }
537                         if (verbose >= 0)
538                                 fprintf(stderr, Name ": added %s\n", dv->devname);
539                         break;
540
541                 case 'r':
542                         /* hot remove */
543                         if (tst->subarray[0]) {
544                                 fprintf(stderr, Name ": Cannot remove disks from a"
545                                         " \'member\' array, perform this"
546                                         " operation on the parent container\n");
547                                 return 1;
548                         }
549                         if (tst->ss->external) {
550                                 /* To remove a device from a container, we must
551                                  * check that it isn't in use in an array.
552                                  * This involves looking in the 'holders'
553                                  * directory - there must be just one entry,
554                                  * the container.
555                                  * To ensure that it doesn't get used as a
556                                  * hold spare while we are checking, we
557                                  * get an O_EXCL open on the container
558                                  */
559                                 int dnum = fd2devnum(fd);
560                                 lfd = open_dev_excl(dnum);
561                                 if (lfd < 0) {
562                                         fprintf(stderr, Name
563                                                 ": Cannot get exclusive access "
564                                                 " to container - odd\n");
565                                         return 1;
566                                 }
567                                 if (!sysfs_unique_holder(dnum, stb.st_rdev)) {
568                                         fprintf(stderr, Name
569                                                 ": %s is %s, cannot remove.\n",
570                                                 dnprintable,
571                                                 errno == EEXIST ? "still in use":
572                                                 "not a member");
573                                         close(lfd);
574                                         return 1;
575                                 }
576                         }
577                         /* FIXME check that it is a current member */
578                         err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
579                         if (err && errno == ENODEV) {
580                                 /* Old kernels rejected this if no personality
581                                  * registered */
582                                 struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
583                                 struct mdinfo *dv = NULL;
584                                 if (sra)
585                                         dv = sra->devs;
586                                 for ( ; dv ; dv=dv->next)
587                                         if (dv->disk.major == major(stb.st_rdev) &&
588                                             dv->disk.minor == minor(stb.st_rdev))
589                                                 break;
590                                 if (dv)
591                                         err = sysfs_set_str(sra, dv,
592                                                             "state", "remove");
593                                 else
594                                         err = -1;
595                                 if (sra)
596                                         sysfs_free(sra);
597                         }
598                         if (err) {
599                                 fprintf(stderr, Name ": hot remove failed "
600                                         "for %s: %s\n", dnprintable,
601                                         strerror(errno));
602                                 if (lfd >= 0)
603                                         close(lfd);
604                                 return 1;
605                         }
606                         close(lfd);
607                         if (verbose >= 0)
608                                 fprintf(stderr, Name ": hot removed %s\n",
609                                         dnprintable);
610                         break;
611
612                 case 'f': /* set faulty */
613                         /* FIXME check current member */
614                         if (ioctl(fd, SET_DISK_FAULTY, (unsigned long) stb.st_rdev)) {
615                                 fprintf(stderr, Name ": set device faulty failed for %s:  %s\n",
616                                         dnprintable, strerror(errno));
617                                 return 1;
618                         }
619                         if (verbose >= 0)
620                                 fprintf(stderr, Name ": set %s faulty in %s\n",
621                                         dnprintable, devname);
622                         break;
623                 }
624         }
625         return 0;
626
627 }
628
629 int autodetect(void)
630 {
631         /* Open any md device, and issue the RAID_AUTORUN ioctl */
632         int rv = 1;
633         int fd = dev_open("9:0", O_RDONLY);
634         if (fd >= 0) {
635                 if (ioctl(fd, RAID_AUTORUN, 0) == 0)
636                         rv = 0;
637                 close(fd);
638         }
639         return rv;
640 }
641 #endif