]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
mdadm-1.6.0
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 unsigned int major, minor;
102 unsigned int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int state;
107 int raid_disk;
108 } *devices;
109 int *best = NULL; /* indexed by raid_disk */
110 unsigned int bestcnt = 0;
111 int devcnt = 0;
112 unsigned int okcnt, sparecnt;
113 unsigned int req_cnt;
114 unsigned int i;
115 int most_recent = 0;
116 int chosen_drive;
117 int change = 0;
118 int inargv = 0;
119 int start_partial_ok = force || devlist==NULL;
120 unsigned int num_devs;
121 mddev_dev_t tmpdev;
122
123 vers = md_get_version(mdfd);
124 if (vers <= 0) {
125 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
126 return 1;
127 }
128 if (vers < 9000) {
129 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
130 " Upgrade your kernel or try --build\n");
131 return 1;
132 }
133 if (get_linux_version() < 2004000)
134 old_linux = 1;
135
136 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
137 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
138 mddev);
139 return 1;
140 }
141 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
142
143 /*
144 * If any subdevs are listed, then any that don't
145 * match ident are discarded. Remainder must all match and
146 * become the array.
147 * If no subdevs, then we scan all devices in the config file, but
148 * there must be something in the identity
149 */
150
151 if (!devlist &&
152 ident->uuid_set == 0 &&
153 ident->super_minor < 0 &&
154 ident->devices == NULL) {
155 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
156 mddev);
157 return 1;
158 }
159 if (devlist == NULL)
160 devlist = conf_get_devs(conffile);
161 else inargv = 1;
162
163 tmpdev = devlist; num_devs = 0;
164 while (tmpdev) {
165 num_devs++;
166 tmpdev = tmpdev->next;
167 }
168 best = malloc(num_devs * sizeof(*best));
169 devices = malloc(num_devs * sizeof(*devices));
170
171 first_super.md_magic = 0;
172 for (i=0; i<num_devs; i++)
173 best[i] = -1;
174
175 if (verbose)
176 fprintf(stderr, Name ": looking for devices for %s\n",
177 mddev);
178
179 while ( devlist) {
180 char *devname;
181 int this_uuid[4];
182 int dfd;
183 struct stat stb;
184 int havesuper=0;
185
186 devname = devlist->devname;
187 devlist = devlist->next;
188
189 if (ident->devices &&
190 !match_oneof(ident->devices, devname)) {
191 if (inargv || verbose)
192 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
193 continue;
194 }
195
196 dfd = open(devname, O_RDONLY, 0);
197 if (dfd < 0) {
198 if (inargv || verbose)
199 fprintf(stderr, Name ": cannot open device %s: %s\n",
200 devname, strerror(errno));
201 } else if (fstat(dfd, &stb)< 0) {
202 /* Impossible! */
203 fprintf(stderr, Name ": fstat failed for %s: %s\n",
204 devname, strerror(errno));
205 close(dfd);
206 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
207 fprintf(stderr, Name ": %s is not a block device.\n",
208 devname);
209 close(dfd);
210 } else if (load_super(dfd, &super)) {
211 if (inargv || verbose)
212 fprintf( stderr, Name ": no RAID superblock on %s\n",
213 devname);
214 close(dfd);
215 } else {
216 havesuper =1;
217 uuid_from_super(this_uuid, &super);
218 close(dfd);
219 }
220
221 if (ident->uuid_set &&
222 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
223 if (inargv || verbose)
224 fprintf(stderr, Name ": %s has wrong uuid.\n",
225 devname);
226 continue;
227 }
228 if (ident->super_minor != UnSet &&
229 (!havesuper || ident->super_minor != super.md_minor)) {
230 if (inargv || verbose)
231 fprintf(stderr, Name ": %s has wrong super-minor.\n",
232 devname);
233 continue;
234 }
235 if (ident->level != UnSet &&
236 (!havesuper|| ident->level != (int)super.level)) {
237 if (inargv || verbose)
238 fprintf(stderr, Name ": %s has wrong raid level.\n",
239 devname);
240 continue;
241 }
242 if (ident->raid_disks != UnSet &&
243 (!havesuper || ident->raid_disks!= super.raid_disks)) {
244 if (inargv || verbose)
245 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
246 devname);
247 continue;
248 }
249
250 /* If we are this far, then we are commited to this device.
251 * If the super_block doesn't exist, or doesn't match others,
252 * then we cannot continue
253 */
254
255 if (!havesuper) {
256 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
257 devname);
258 return 1;
259 }
260 if (compare_super(&first_super, &super)) {
261 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
262 devname);
263 return 1;
264 }
265
266
267 /* this is needed until we get a more relaxed super block format */
268 if (devcnt >= MD_SB_DISKS) {
269 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
270 devname);
271 continue;
272 }
273
274 /* looks like a good enough match to update the super block if needed */
275 if (update) {
276 if (strcmp(update, "sparc2.2")==0 ) {
277 /* 2.2 sparc put the events in the wrong place
278 * So we copy the tail of the superblock
279 * up 4 bytes before continuing
280 */
281 __u32 *sb32 = (__u32*)&super;
282 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
283 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
284 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
285 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
286 devname);
287 }
288 if (strcmp(update, "super-minor") ==0) {
289 struct stat stb2;
290 fstat(mdfd, &stb2);
291 super.md_minor = MINOR(stb2.st_rdev);
292 if (verbose)
293 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
294 devname, super.md_minor);
295 }
296 if (strcmp(update, "summaries") == 0) {
297 /* set nr_disks, active_disks, working_disks,
298 * failed_disks, spare_disks based on disks[]
299 * array in superblock.
300 * Also make sure extra slots aren't 'failed'
301 */
302 super.nr_disks = super.active_disks =
303 super.working_disks = super.failed_disks =
304 super.spare_disks = 0;
305 for (i=0; i < MD_SB_DISKS ; i++)
306 if (super.disks[i].major ||
307 super.disks[i].minor) {
308 int state = super.disks[i].state;
309 if (state & (1<<MD_DISK_REMOVED))
310 continue;
311 super.nr_disks++;
312 if (state & (1<<MD_DISK_ACTIVE))
313 super.active_disks++;
314 if (state & (1<<MD_DISK_FAULTY))
315 super.failed_disks++;
316 else
317 super.working_disks++;
318 if (state == 0)
319 super.spare_disks++;
320 } else if (i >= super.raid_disks && super.disks[i].number == 0)
321 super.disks[i].state = 0;
322 }
323 super.sb_csum = calc_sb_csum(&super);
324 dfd = open(devname, O_RDWR, 0);
325 if (dfd < 0)
326 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
327 devname);
328 else if (store_super(dfd, &super))
329 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
330 devname);
331 if (dfd >= 0)
332 close(dfd);
333 }
334
335 if (verbose)
336 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
337 devname, mddev, super.this_disk.raid_disk);
338 devices[devcnt].devname = devname;
339 devices[devcnt].major = MAJOR(stb.st_rdev);
340 devices[devcnt].minor = MINOR(stb.st_rdev);
341 devices[devcnt].oldmajor = super.this_disk.major;
342 devices[devcnt].oldminor = super.this_disk.minor;
343 devices[devcnt].events = md_event(&super);
344 devices[devcnt].utime = super.utime;
345 devices[devcnt].raid_disk = super.this_disk.raid_disk;
346 devices[devcnt].uptodate = 0;
347 devices[devcnt].state = super.this_disk.state;
348 if (most_recent < devcnt) {
349 if (devices[devcnt].events
350 > devices[most_recent].events)
351 most_recent = devcnt;
352 }
353 if ((int)super.level == -4)
354 /* with multipath, the raid_disk from the superblock is meaningless */
355 i = devcnt;
356 else
357 i = devices[devcnt].raid_disk;
358 if (i < 10000) {
359 if (i >= bestcnt) {
360 unsigned int newbestcnt = i+10;
361 int *newbest = malloc(sizeof(int)*newbestcnt);
362 unsigned int c;
363 for (c=0; c < newbestcnt; c++)
364 if (c < bestcnt)
365 newbest[c] = best[c];
366 else
367 newbest[c] = -1;
368 if (best)free(best);
369 best = newbest;
370 bestcnt = newbestcnt;
371 }
372 if (best[i] == -1
373 || devices[best[i]].events < devices[devcnt].events)
374 best[i] = devcnt;
375 }
376 devcnt++;
377 }
378
379 if (devcnt == 0) {
380 fprintf(stderr, Name ": no devices found for %s\n",
381 mddev);
382 return 1;
383 }
384 /* now we have some devices that might be suitable.
385 * I wonder how many
386 */
387 okcnt = 0;
388 sparecnt=0;
389 for (i=0; i< bestcnt ;i++) {
390 int j = best[i];
391 int event_margin = !force;
392 if (j < 0) continue;
393 /* note: we ignore error flags in multipath arrays
394 * as they don't make sense
395 */
396 if ((int)first_super.level != -4)
397 if (!(devices[j].state & (1<<MD_DISK_SYNC))) {
398 if (!(devices[j].state & (1<<MD_DISK_FAULTY)))
399 sparecnt++;
400 continue;
401 }
402 if (devices[j].events+event_margin >=
403 devices[most_recent].events) {
404 devices[j].uptodate = 1;
405 if (i < first_super.raid_disks)
406 okcnt++;
407 else
408 sparecnt++;
409 }
410 }
411 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
412 /* Choose the newest best drive which is
413 * not up-to-date, update the superblock
414 * and add it.
415 */
416 int fd;
417 chosen_drive = -1;
418 for (i=0; i<first_super.raid_disks && i < bestcnt; i++) {
419 int j = best[i];
420 if (j>=0 &&
421 !devices[j].uptodate &&
422 devices[j].events > 0 &&
423 (chosen_drive < 0 ||
424 devices[j].events > devices[chosen_drive].events))
425 chosen_drive = j;
426 }
427 if (chosen_drive < 0)
428 break;
429 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
430 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
431 (int)(devices[chosen_drive].events),
432 (int)(devices[most_recent].events));
433 fd = open(devices[chosen_drive].devname, O_RDWR);
434 if (fd < 0) {
435 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
436 devices[chosen_drive].devname);
437 devices[chosen_drive].events = 0;
438 continue;
439 }
440 if (load_super(fd, &super)) {
441 close(fd);
442 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
443 devices[chosen_drive].devname);
444 devices[chosen_drive].events = 0;
445 continue;
446 }
447 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
448 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
449 if (super.level == 5 || super.level == 4) {
450 /* need to force clean */
451 super.state = (1<<MD_SB_CLEAN);
452 }
453 super.sb_csum = calc_sb_csum(&super);
454 /*DRYRUN*/ if (store_super(fd, &super)) {
455 close(fd);
456 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
457 devices[chosen_drive].devname);
458 devices[chosen_drive].events = 0;
459 continue;
460 }
461 close(fd);
462 devices[chosen_drive].events = devices[most_recent].events;
463 devices[chosen_drive].uptodate = 1;
464 okcnt++;
465 }
466
467 /* Now we want to look at the superblock which the kernel will base things on
468 * and compare the devices that we think are working with the devices that the
469 * superblock thinks are working.
470 * If there are differences and --force is given, then update this chosen
471 * superblock.
472 */
473 chosen_drive = -1;
474 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
475 int j = best[i];
476 int fd;
477 if (j<0)
478 continue;
479 if (!devices[j].uptodate)
480 continue;
481 chosen_drive = j;
482 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
483 fprintf(stderr, Name ": Cannot open %s: %s\n",
484 devices[j].devname, strerror(errno));
485 return 1;
486 }
487 if (load_super(fd, &super)) {
488 close(fd);
489 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
490 devices[j].devname);
491 return 1;
492 }
493 close(fd);
494 }
495
496 for (i=0; i<bestcnt; i++) {
497 int j = best[i];
498 unsigned int desired_state;
499
500 if (i < super.raid_disks)
501 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
502 else
503 desired_state = 0;
504
505 if (j<0)
506 continue;
507 if (!devices[j].uptodate)
508 continue;
509 #if 0
510 This doesnt work yet
511 if (devices[j].major != super.disks[i].major ||
512 devices[j].minor != super.disks[i].minor) {
513 change |= 1;
514 super.disks[i].major = devices[j].major;
515 super.disks[i].minor = devices[j].minor;
516 }
517 #endif
518 if (devices[j].oldmajor != super.disks[i].major ||
519 devices[j].oldminor != super.disks[i].minor) {
520 change |= 2;
521 super.disks[i].major = devices[j].oldmajor;
522 super.disks[i].minor = devices[j].oldminor;
523 }
524 if (devices[j].uptodate &&
525 (super.disks[i].state != desired_state)) {
526 if (force) {
527 fprintf(stderr, Name ": "
528 "clearing FAULTY flag for device %d in %s for %s\n",
529 j, mddev, devices[j].devname);
530 super.disks[i].state = desired_state;
531 change |= 2;
532 } else {
533 fprintf(stderr, Name ": "
534 "device %d in %s has wrong state in superblock, but %s seems ok\n",
535 i, mddev, devices[j].devname);
536 }
537 }
538 if (!devices[j].uptodate &&
539 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
540 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
541 i, mddev);
542 }
543 }
544 if (force && (super.level == 4 || super.level == 5) &&
545 okcnt == super.raid_disks-1) {
546 super.state = (1<< MD_SB_CLEAN);
547 change |= 2;
548 }
549
550 if ((force && (change & 2))
551 || (old_linux && (change & 1))) {
552 int fd;
553 super.sb_csum = calc_sb_csum(&super);
554 fd = open(devices[chosen_drive].devname, O_RDWR);
555 if (fd < 0) {
556 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
557 devices[chosen_drive].devname);
558 return 1;
559 }
560 if (store_super(fd, &super)) {
561 close(fd);
562 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
563 devices[chosen_drive].devname);
564 return 1;
565 }
566 close(fd);
567 change = 0;
568 }
569
570 /* count number of in-sync devices according to the superblock.
571 * We must have this number to start the array without -s or -R
572 */
573 req_cnt = 0;
574 for (i=0; i<MD_SB_DISKS; i++)
575 if ((first_super.disks[i].state & (1<<MD_DISK_SYNC)) &&
576 (first_super.disks[i].state & (1<<MD_DISK_ACTIVE)) &&
577 !(first_super.disks[i].state & (1<<MD_DISK_FAULTY)))
578 req_cnt ++;
579
580
581 /* Almost ready to actually *do* something */
582 if (!old_linux) {
583 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
584 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
585 mddev, strerror(errno));
586 return 1;
587 }
588 /* First, add the raid disks, but add the chosen one last */
589 for (i=0; i<= bestcnt; i++) {
590 int j;
591 if (i < bestcnt) {
592 j = best[i];
593 if (j == chosen_drive)
594 continue;
595 } else
596 j = chosen_drive;
597
598 if (j >= 0 /* && devices[j].uptodate */) {
599 mdu_disk_info_t disk;
600 memset(&disk, 0, sizeof(disk));
601 disk.major = devices[j].major;
602 disk.minor = devices[j].minor;
603 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
604 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
605 devices[j].devname,
606 mddev,
607 strerror(errno));
608 if (i < first_super.raid_disks)
609 okcnt--;
610 else
611 sparecnt--;
612 } else if (verbose)
613 fprintf(stderr, Name ": added %s to %s as %d\n",
614 devices[j].devname, mddev, devices[j].raid_disk);
615 } else if (verbose && i < first_super.raid_disks)
616 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
617 i, mddev);
618 }
619
620 if (runstop == 1 ||
621 (runstop == 0 &&
622 ( enough(first_super.level, first_super.raid_disks, okcnt) &&
623 (okcnt >= req_cnt || start_partial_ok)
624 ))) {
625 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
626 fprintf(stderr, Name ": %s has been started with %d drive%s",
627 mddev, okcnt, okcnt==1?"":"s");
628 if (okcnt < first_super.raid_disks)
629 fprintf(stderr, " (out of %d)", first_super.raid_disks);
630 if (sparecnt)
631 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
632 fprintf(stderr, ".\n");
633 return 0;
634 }
635 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
636 mddev, strerror(errno));
637 return 1;
638 }
639 if (runstop == -1) {
640 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
641 mddev, okcnt, okcnt==1?"":"s");
642 return 0;
643 }
644 fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
645 if (sparecnt)
646 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
647 if (!enough(first_super.level, first_super.raid_disks, okcnt))
648 fprintf(stderr, " - not enough to start the array.\n");
649 else {
650 if (req_cnt == first_super.raid_disks)
651 fprintf(stderr, " - need all %d to start it", req_cnt);
652 else
653 fprintf(stderr, " - need %d of %d to start", req_cnt, first_super.raid_disks);
654 fprintf(stderr, " (use --run to insist).\n");
655 }
656 return 1;
657 } else {
658 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
659 * been updated to point to the current locations of devices.
660 * so we can just start the array
661 */
662 unsigned long dev;
663 dev = MKDEV(devices[chosen_drive].major,
664 devices[chosen_drive].minor);
665 if (ioctl(mdfd, START_ARRAY, dev)) {
666 fprintf(stderr, Name ": Cannot start array: %s\n",
667 strerror(errno));
668 }
669
670 }
671 return 0;
672 }