]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Assemble.c
794b00d43421909cc6f91582aa5c4fc777abd2ed
[thirdparty/mdadm.git] / Assemble.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
28 */
29
30 #include "mdadm.h"
31 #include "md_u.h"
32 #include "md_p.h"
33
34 int Assemble(char *mddev, int mdfd,
35 mddev_ident_t ident, char *conffile,
36 mddev_dev_t devlist,
37 int readonly, int runstop,
38 char *update,
39 int verbose, int force)
40 {
41 /*
42 * The task of Assemble is to find a collection of
43 * devices that should (according to their superblocks)
44 * form an array, and to give this collection to the MD driver.
45 * In Linux-2.4 and later, this involves submitting a
46 * SET_ARRAY_INFO ioctl with no arg - to prepare
47 * the array - and then submit a number of
48 * ADD_NEW_DISK ioctls to add disks into
49 * the array. Finally RUN_ARRAY might
50 * be submitted to start the array.
51 *
52 * Much of the work of Assemble is in finding and/or
53 * checking the disks to make sure they look right.
54 *
55 * If mddev is not set, then scan must be and we
56 * read through the config file for dev+uuid mapping
57 * We recurse, setting mddev, for each device that
58 * - isn't running
59 * - has a valid uuid (or any uuid if !uuidset
60 *
61 * If mddev is set, we try to determine state of md.
62 * check version - must be at least 0.90.0
63 * check kernel version. must be at least 2.4.
64 * If not, we can possibly fall back on START_ARRAY
65 * Try to GET_ARRAY_INFO.
66 * If possible, give up
67 * If not, try to STOP_ARRAY just to make sure
68 *
69 * If !uuidset and scan, look in conf-file for uuid
70 * If not found, give up
71 * If !devlist and scan and uuidset, get list of devs from conf-file
72 *
73 * For each device:
74 * Check superblock - discard if bad
75 * Check uuid (set if we don't have one) - discard if no match
76 * Check superblock similarity if we have a superblock - discard if different
77 * Record events, devicenum, utime
78 * This should give us a list of devices for the array
79 * We should collect the most recent event and utime numbers
80 *
81 * Count disks with recent enough event count
82 * While force && !enough disks
83 * Choose newest rejected disks, update event count
84 * mark clean and rewrite superblock
85 * If recent kernel:
86 * SET_ARRAY_INFO
87 * foreach device with recent events : ADD_NEW_DISK
88 * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
89 * If old kernel:
90 * Check the device numbers in superblock are right
91 * update superblock if any changes
92 * START_ARRAY
93 *
94 */
95 int old_linux = 0;
96 int vers;
97 mdu_array_info_t array;
98 mdp_super_t first_super, super;
99 struct {
100 char *devname;
101 int major, minor;
102 int oldmajor, oldminor;
103 long long events;
104 time_t utime;
105 int uptodate;
106 int state;
107 int raid_disk;
108 } *devices;
109 int *best = NULL; /* indexed by raid_disk */
110 int bestcnt = 0;
111 int devcnt = 0, okcnt, sparecnt;
112 int req_cnt;
113 int i;
114 int most_recent = 0;
115 int chosen_drive;
116 int change = 0;
117 int inargv = 0;
118 int start_partial_ok = force || devlist==NULL;
119 int num_devs;
120 mddev_dev_t tmpdev;
121
122 vers = md_get_version(mdfd);
123 if (vers <= 0) {
124 fprintf(stderr, Name ": %s appears not to be an md device.\n", mddev);
125 return 1;
126 }
127 if (vers < 9000) {
128 fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n"
129 " Upgrade your kernel or try --build\n");
130 return 1;
131 }
132 if (get_linux_version() < 2004000)
133 old_linux = 1;
134
135 if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) {
136 fprintf(stderr, Name ": device %s already active - cannot assemble it\n",
137 mddev);
138 return 1;
139 }
140 ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */
141
142 /*
143 * If any subdevs are listed, then any that don't
144 * match ident are discarded. Remainder must all match and
145 * become the array.
146 * If no subdevs, then we scan all devices in the config file, but
147 * there must be something in the identity
148 */
149
150 if (!devlist &&
151 ident->uuid_set == 0 &&
152 ident->super_minor < 0 &&
153 ident->devices == NULL) {
154 fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
155 mddev);
156 return 1;
157 }
158 if (devlist == NULL)
159 devlist = conf_get_devs(conffile);
160 else inargv = 1;
161
162 tmpdev = devlist; num_devs = 0;
163 while (tmpdev) {
164 num_devs++;
165 tmpdev = tmpdev->next;
166 }
167 best = malloc(num_devs * sizeof(*best));
168 devices = malloc(num_devs * sizeof(*devices));
169
170 first_super.md_magic = 0;
171 for (i=0; i<num_devs; i++)
172 best[i] = -1;
173
174 if (verbose)
175 fprintf(stderr, Name ": looking for devices for %s\n",
176 mddev);
177
178 while ( devlist) {
179 char *devname;
180 int this_uuid[4];
181 int dfd;
182 struct stat stb;
183 int havesuper=0;
184
185 devname = devlist->devname;
186 devlist = devlist->next;
187
188 if (ident->devices &&
189 !match_oneof(ident->devices, devname)) {
190 if (inargv || verbose)
191 fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices);
192 continue;
193 }
194
195 dfd = open(devname, O_RDONLY, 0);
196 if (dfd < 0) {
197 if (inargv || verbose)
198 fprintf(stderr, Name ": cannot open device %s: %s\n",
199 devname, strerror(errno));
200 } else if (fstat(dfd, &stb)< 0) {
201 /* Impossible! */
202 fprintf(stderr, Name ": fstat failed for %s: %s\n",
203 devname, strerror(errno));
204 close(dfd);
205 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
206 fprintf(stderr, Name ": %s is not a block device.\n",
207 devname);
208 close(dfd);
209 } else if (load_super(dfd, &super)) {
210 if (inargv || verbose)
211 fprintf( stderr, Name ": no RAID superblock on %s\n",
212 devname);
213 close(dfd);
214 } else {
215 havesuper =1;
216 uuid_from_super(this_uuid, &super);
217 close(dfd);
218 }
219
220 if (ident->uuid_set &&
221 (!havesuper || same_uuid(this_uuid, ident->uuid)==0)) {
222 if (inargv || verbose)
223 fprintf(stderr, Name ": %s has wrong uuid.\n",
224 devname);
225 continue;
226 }
227 if (ident->super_minor >= 0 &&
228 (!havesuper || ident->super_minor != super.md_minor)) {
229 if (inargv || verbose)
230 fprintf(stderr, Name ": %s has wrong super-minor.\n",
231 devname);
232 continue;
233 }
234 if (ident->level != -10 &&
235 (!havesuper|| ident->level != super.level)) {
236 if (inargv || verbose)
237 fprintf(stderr, Name ": %s has wrong raid level.\n",
238 devname);
239 continue;
240 }
241 if (ident->raid_disks != -1 &&
242 (!havesuper || ident->raid_disks!= super.raid_disks)) {
243 if (inargv || verbose)
244 fprintf(stderr, Name ": %s requires wrong number of drives.\n",
245 devname);
246 continue;
247 }
248
249 /* If we are this far, then we are commited to this device.
250 * If the super_block doesn't exist, or doesn't match others,
251 * then we cannot continue
252 */
253
254 if (!havesuper) {
255 fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
256 devname);
257 return 1;
258 }
259 if (compare_super(&first_super, &super)) {
260 fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
261 devname);
262 return 1;
263 }
264
265
266 /* this is needed until we get a more relaxed super block format */
267 if (devcnt >= MD_SB_DISKS) {
268 fprintf(stderr, Name ": ouch - too many devices appear to be in this array. Ignoring %s\n",
269 devname);
270 continue;
271 }
272
273 /* looks like a good enough match to update the super block if needed */
274 if (update) {
275 if (strcmp(update, "sparc2.2")==0 ) {
276 /* 2.2 sparc put the events in the wrong place
277 * So we copy the tail of the superblock
278 * up 4 bytes before continuing
279 */
280 __u32 *sb32 = (__u32*)&super;
281 memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
282 sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
283 (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
284 fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
285 devname);
286 }
287 if (strcmp(update, "super-minor") ==0) {
288 struct stat stb2;
289 fstat(mdfd, &stb2);
290 super.md_minor = MINOR(stb2.st_rdev);
291 if (verbose)
292 fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
293 devname, super.md_minor);
294 }
295 if (strcmp(update, "summaries") == 0) {
296 /* set nr_disks, active_disks, working_disks,
297 * failed_disks, spare_disks based on disks[]
298 * array in superblock.
299 * Also make sure extra slots aren't 'failed'
300 */
301 super.nr_disks = super.active_disks =
302 super.working_disks = super.failed_disks =
303 super.spare_disks = 0;
304 for (i=0; i < MD_SB_DISKS ; i++)
305 if (super.disks[i].major ||
306 super.disks[i].minor) {
307 int state = super.disks[i].state;
308 if (state & (1<<MD_DISK_REMOVED))
309 continue;
310 super.nr_disks++;
311 if (state & (1<<MD_DISK_ACTIVE))
312 super.active_disks++;
313 if (state & (1<<MD_DISK_FAULTY))
314 super.failed_disks++;
315 else
316 super.working_disks++;
317 if (state == 0)
318 super.spare_disks++;
319 } else if (i >= super.raid_disks && super.disks[i].number == 0)
320 super.disks[i].state = 0;
321 }
322 super.sb_csum = calc_sb_csum(&super);
323 dfd = open(devname, O_RDWR, 0);
324 if (dfd < 0)
325 fprintf(stderr, Name ": Cannot open %s for superblock update\n",
326 devname);
327 else if (store_super(dfd, &super))
328 fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
329 devname);
330 if (dfd >= 0)
331 close(dfd);
332 }
333
334 if (verbose)
335 fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n",
336 devname, mddev, super.this_disk.raid_disk);
337 devices[devcnt].devname = devname;
338 devices[devcnt].major = MAJOR(stb.st_rdev);
339 devices[devcnt].minor = MINOR(stb.st_rdev);
340 devices[devcnt].oldmajor = super.this_disk.major;
341 devices[devcnt].oldminor = super.this_disk.minor;
342 devices[devcnt].events = md_event(&super);
343 devices[devcnt].utime = super.utime;
344 devices[devcnt].raid_disk = super.this_disk.raid_disk;
345 devices[devcnt].uptodate = 0;
346 devices[devcnt].state = super.this_disk.state;
347 if (most_recent < devcnt) {
348 if (devices[devcnt].events
349 > devices[most_recent].events)
350 most_recent = devcnt;
351 }
352 if (super.level == -4)
353 /* with multipath, the raid_disk from the superblock is meaningless */
354 i = devcnt;
355 else
356 i = devices[devcnt].raid_disk;
357 if (i>=0 && i < 10000) {
358 if (i >= bestcnt) {
359 int newbestcnt = i+10;
360 int *newbest = malloc(sizeof(int)*newbestcnt);
361 int c;
362 for (c=0; c < newbestcnt; c++)
363 if (c < bestcnt)
364 newbest[c] = best[c];
365 else
366 newbest[c] = -1;
367 if (best)free(best);
368 best = newbest;
369 bestcnt = newbestcnt;
370 }
371 if (best[i] == -1
372 || devices[best[i]].events < devices[devcnt].events)
373 best[i] = devcnt;
374 }
375 devcnt++;
376 }
377
378 if (devcnt == 0) {
379 fprintf(stderr, Name ": no devices found for %s\n",
380 mddev);
381 return 1;
382 }
383 /* now we have some devices that might be suitable.
384 * I wonder how many
385 */
386 okcnt = 0;
387 sparecnt=0;
388 for (i=0; i< bestcnt ;i++) {
389 int j = best[i];
390 int event_margin = !force;
391 if (j < 0) continue;
392 /* note: we ignore error flags in multipath arrays
393 * as they don't make sense
394 */
395 if (first_super.level != -4)
396 if (!(devices[j].state & (1<<MD_DISK_SYNC))) {
397 if (!(devices[j].state & (1<<MD_DISK_FAULTY)))
398 sparecnt++;
399 continue;
400 }
401 if (devices[j].events+event_margin >=
402 devices[most_recent].events) {
403 devices[j].uptodate = 1;
404 if (i < first_super.raid_disks)
405 okcnt++;
406 else
407 sparecnt++;
408 }
409 }
410 while (force && !enough(first_super.level, first_super.raid_disks, okcnt)) {
411 /* Choose the newest best drive which is
412 * not up-to-date, update the superblock
413 * and add it.
414 */
415 int fd;
416 chosen_drive = -1;
417 for (i=0; i<first_super.raid_disks && i < bestcnt; i++) {
418 int j = best[i];
419 if (j>=0 &&
420 !devices[j].uptodate &&
421 devices[j].events > 0 &&
422 (chosen_drive < 0 ||
423 devices[j].events > devices[chosen_drive].events))
424 chosen_drive = j;
425 }
426 if (chosen_drive < 0)
427 break;
428 fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n",
429 devices[chosen_drive].devname, devices[chosen_drive].raid_disk,
430 (int)(devices[chosen_drive].events),
431 (int)(devices[most_recent].events));
432 fd = open(devices[chosen_drive].devname, O_RDWR);
433 if (fd < 0) {
434 fprintf(stderr, Name ": Couldn't open %s for write - not updating\n",
435 devices[chosen_drive].devname);
436 devices[chosen_drive].events = 0;
437 continue;
438 }
439 if (load_super(fd, &super)) {
440 close(fd);
441 fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n",
442 devices[chosen_drive].devname);
443 devices[chosen_drive].events = 0;
444 continue;
445 }
446 super.events_hi = (devices[most_recent].events>>32)&0xFFFFFFFF;
447 super.events_lo = (devices[most_recent].events)&0xFFFFFFFF;
448 if (super.level == 5 || super.level == 4) {
449 /* need to force clean */
450 super.state = (1<<MD_SB_CLEAN);
451 }
452 super.sb_csum = calc_sb_csum(&super);
453 /*DRYRUN*/ if (store_super(fd, &super)) {
454 close(fd);
455 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
456 devices[chosen_drive].devname);
457 devices[chosen_drive].events = 0;
458 continue;
459 }
460 close(fd);
461 devices[chosen_drive].events = devices[most_recent].events;
462 devices[chosen_drive].uptodate = 1;
463 okcnt++;
464 }
465
466 /* Now we want to look at the superblock which the kernel will base things on
467 * and compare the devices that we think are working with the devices that the
468 * superblock thinks are working.
469 * If there are differences and --force is given, then update this chosen
470 * superblock.
471 */
472 chosen_drive = -1;
473 for (i=0; chosen_drive < 0 && i<bestcnt; i++) {
474 int j = best[i];
475 int fd;
476 if (j<0)
477 continue;
478 if (!devices[j].uptodate)
479 continue;
480 chosen_drive = j;
481 if ((fd=open(devices[j].devname, O_RDONLY))< 0) {
482 fprintf(stderr, Name ": Cannot open %s: %s\n",
483 devices[j].devname, strerror(errno));
484 return 1;
485 }
486 if (load_super(fd, &super)) {
487 close(fd);
488 fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
489 devices[j].devname);
490 return 1;
491 }
492 close(fd);
493 }
494
495 for (i=0; i<bestcnt; i++) {
496 int j = best[i];
497 int desired_state;
498
499 if (i < super.raid_disks)
500 desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
501 else
502 desired_state = 0;
503
504 if (j<0)
505 continue;
506 if (!devices[j].uptodate)
507 continue;
508 #if 0
509 This doesnt work yet
510 if (devices[j].major != super.disks[i].major ||
511 devices[j].minor != super.disks[i].minor) {
512 change |= 1;
513 super.disks[i].major = devices[j].major;
514 super.disks[i].minor = devices[j].minor;
515 }
516 #endif
517 if (devices[j].oldmajor != super.disks[i].major ||
518 devices[j].oldminor != super.disks[i].minor) {
519 change |= 2;
520 super.disks[i].major = devices[j].oldmajor;
521 super.disks[i].minor = devices[j].oldminor;
522 }
523 if (devices[j].uptodate &&
524 (super.disks[i].state != desired_state)) {
525 if (force) {
526 fprintf(stderr, Name ": "
527 "clearing FAULTY flag for device %d in %s for %s\n",
528 j, mddev, devices[j].devname);
529 super.disks[i].state = desired_state;
530 change |= 2;
531 } else {
532 fprintf(stderr, Name ": "
533 "device %d in %s has wrong state in superblock, but %s seems ok\n",
534 i, mddev, devices[j].devname);
535 }
536 }
537 if (!devices[j].uptodate &&
538 !(super.disks[i].state & (1 << MD_DISK_FAULTY))) {
539 fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
540 i, mddev);
541 }
542 }
543 if (force && (super.level == 4 || super.level == 5) &&
544 okcnt == super.raid_disks-1) {
545 super.state = (1<< MD_SB_CLEAN);
546 change |= 2;
547 }
548
549 if ((force && (change & 2))
550 || (old_linux && (change & 1))) {
551 int fd;
552 super.sb_csum = calc_sb_csum(&super);
553 fd = open(devices[chosen_drive].devname, O_RDWR);
554 if (fd < 0) {
555 fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
556 devices[chosen_drive].devname);
557 return 1;
558 }
559 if (store_super(fd, &super)) {
560 close(fd);
561 fprintf(stderr, Name ": Could not re-write superblock on %s\n",
562 devices[chosen_drive].devname);
563 return 1;
564 }
565 close(fd);
566 change = 0;
567 }
568
569 /* count number of in-sync devices according to the superblock.
570 * We must have this number to start the array without -s or -R
571 */
572 req_cnt = 0;
573 for (i=0; i<MD_SB_DISKS; i++)
574 if ((first_super.disks[i].state & (1<<MD_DISK_SYNC)) &&
575 (first_super.disks[i].state & (1<<MD_DISK_ACTIVE)) &&
576 !(first_super.disks[i].state & (1<<MD_DISK_FAULTY)))
577 req_cnt ++;
578
579
580 /* Almost ready to actually *do* something */
581 if (!old_linux) {
582 if (ioctl(mdfd, SET_ARRAY_INFO, NULL) != 0) {
583 fprintf(stderr, Name ": SET_ARRAY_INFO failed for %s: %s\n",
584 mddev, strerror(errno));
585 return 1;
586 }
587 /* First, add the raid disks, but add the chosen one last */
588 for (i=0; i<= bestcnt; i++) {
589 int j;
590 if (i < bestcnt) {
591 j = best[i];
592 if (j == chosen_drive)
593 continue;
594 } else
595 j = chosen_drive;
596
597 if (j >= 0 /* && devices[j].uptodate */) {
598 mdu_disk_info_t disk;
599 memset(&disk, 0, sizeof(disk));
600 disk.major = devices[j].major;
601 disk.minor = devices[j].minor;
602 if (ioctl(mdfd, ADD_NEW_DISK, &disk)!=0) {
603 fprintf(stderr, Name ": failed to add %s to %s: %s\n",
604 devices[j].devname,
605 mddev,
606 strerror(errno));
607 if (i < first_super.raid_disks)
608 okcnt--;
609 else
610 sparecnt--;
611 } else if (verbose)
612 fprintf(stderr, Name ": added %s to %s as %d\n",
613 devices[j].devname, mddev, devices[j].raid_disk);
614 } else if (verbose && i < first_super.raid_disks)
615 fprintf(stderr, Name ": no uptodate device for slot %d of %s\n",
616 i, mddev);
617 }
618
619 if (runstop == 1 ||
620 (runstop == 0 &&
621 ( enough(first_super.level, first_super.raid_disks, okcnt) &&
622 (okcnt >= req_cnt || start_partial_ok)
623 ))) {
624 if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
625 fprintf(stderr, Name ": %s has been started with %d drive%s",
626 mddev, okcnt, okcnt==1?"":"s");
627 if (okcnt < first_super.raid_disks)
628 fprintf(stderr, " (out of %d)", first_super.raid_disks);
629 if (sparecnt)
630 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
631 fprintf(stderr, ".\n");
632 return 0;
633 }
634 fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
635 mddev, strerror(errno));
636 return 1;
637 }
638 if (runstop == -1) {
639 fprintf(stderr, Name ": %s assembled from %d drive%s, but not started.\n",
640 mddev, okcnt, okcnt==1?"":"s");
641 return 0;
642 }
643 fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
644 if (sparecnt)
645 fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
646 if (!enough(first_super.level, first_super.raid_disks, okcnt))
647 fprintf(stderr, " - not enough to start the array.\n");
648 else {
649 if (req_cnt == first_super.raid_disks)
650 fprintf(stderr, " - need all %d to start it", req_cnt);
651 else
652 fprintf(stderr, " - need %d of %d to start", req_cnt, first_super.raid_disks);
653 fprintf(stderr, " (use --run to insist).\n");
654 }
655 return 1;
656 } else {
657 /* The "chosen_drive" is a good choice, and if necessary, the superblock has
658 * been updated to point to the current locations of devices.
659 * so we can just start the array
660 */
661 unsigned long dev;
662 dev = MKDEV(devices[chosen_drive].major,
663 devices[chosen_drive].minor);
664 if (ioctl(mdfd, START_ARRAY, dev)) {
665 fprintf(stderr, Name ": Cannot start array: %s\n",
666 strerror(errno));
667 }
668
669 }
670 return 0;
671 }